mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-30 22:47:16 +08:00
mdbx: refine mdbx.h and error strings (1 of 5).
Change-Id: Iaa6c4afeb9268d916ef1e8e23fdf12b7f603869f
This commit is contained in:
parent
eb4159ac88
commit
1d08f9e673
352
mdbx.h
352
mdbx.h
@ -1,4 +1,190 @@
|
|||||||
/* LICENSE AND COPYRUSTING *****************************************************
|
/**** BRIEFLY ******************************************************************
|
||||||
|
*
|
||||||
|
* libmdbx is superior to LMDB in terms of features and reliability,
|
||||||
|
* not inferior in performance. libmdbx works on Linux, FreeBSD, MacOS X
|
||||||
|
* and other systems compliant with POSIX.1-2008, but also support
|
||||||
|
* Windows as a complementary platform.
|
||||||
|
*
|
||||||
|
* Look below for API description, for other information (build, embedding and
|
||||||
|
* amalgamation, improvements over LMDB, benchmarking, etc) please refer to
|
||||||
|
* README.md at https://abf.io/erthink/libmdbx.
|
||||||
|
*
|
||||||
|
* ---
|
||||||
|
*
|
||||||
|
* The next version is under active non-public development and will be released
|
||||||
|
* as MithrilDB and libmithrildb for libraries & packages. Admittedly mythical
|
||||||
|
* Mithril is resembling silver but being stronger and lighter than steel.
|
||||||
|
* Therefore MithrilDB is rightly relevant name.
|
||||||
|
*
|
||||||
|
* MithrilDB will be radically different from libmdbx by the new database format
|
||||||
|
* and API based on C++17, as well as the Apache 2.0 License. The goal of this
|
||||||
|
* revolution is to provide a clearer and robust API, add more features and new
|
||||||
|
* valuable properties of database.
|
||||||
|
*
|
||||||
|
* The Future will (be) Positive. Всё будет хорошо.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
**** INTRODUCTION *************************************************************
|
||||||
|
*
|
||||||
|
* // For the most part, this section is a copy of the corresponding text
|
||||||
|
* // from LMDB description, but with some edits reflecting the improvements
|
||||||
|
* // and enhancements were made in MDBX.
|
||||||
|
*
|
||||||
|
* MDBX is a Btree-based database management library modeled loosely on the
|
||||||
|
* BerkeleyDB API, but much simplified. The entire database (aka "environment")
|
||||||
|
* is exposed in a memory map, and all data fetches return data directly from
|
||||||
|
* the mapped memory, so no malloc's or memcpy's occur during data fetches.
|
||||||
|
* As such, the library is extremely simple because it requires no page caching
|
||||||
|
* layer of its own, and it is extremely high performance and memory-efficient.
|
||||||
|
* It is also fully transactional with full ACID semantics, and when the memory
|
||||||
|
* map is read-only, the database integrity cannot be corrupted by stray pointer
|
||||||
|
* writes from application code.
|
||||||
|
*
|
||||||
|
* The library is fully thread-aware and supports concurrent read/write access
|
||||||
|
* from multiple processes and threads. Data pages use a copy-on-write strategy
|
||||||
|
* so no active data pages are ever overwritten, which also provides resistance
|
||||||
|
* to corruption and eliminates the need of any special recovery procedures
|
||||||
|
* after a system crash. Writes are fully serialized; only one write transaction
|
||||||
|
* may be active at a time, which guarantees that writers can never deadlock.
|
||||||
|
* The database structure is multi-versioned so readers run with no locks;
|
||||||
|
* writers cannot block readers, and readers don't block writers.
|
||||||
|
*
|
||||||
|
* Unlike other well-known database mechanisms which use either write-ahead
|
||||||
|
* transaction logs or append-only data writes, MDBX requires no maintenance
|
||||||
|
* during operation. Both write-ahead loggers and append-only databases require
|
||||||
|
* periodic checkpointing and/or compaction of their log or database files
|
||||||
|
* otherwise they grow without bound. MDBX tracks free pages within the database
|
||||||
|
* and re-uses them for new write operations, so the database size does not grow
|
||||||
|
* without bound in normal use. It is worth noting that the "next" version
|
||||||
|
* libmdbx (MithrilDB) will solve this problem.
|
||||||
|
*
|
||||||
|
* The memory map can be used as a read-only or read-write map. It is read-only
|
||||||
|
* by default as this provides total immunity to corruption. Using read-write
|
||||||
|
* mode offers much higher write performance, but adds the possibility for stray
|
||||||
|
* application writes thru pointers to silently corrupt the database.
|
||||||
|
* Of course if your application code is known to be bug-free (...) then this is
|
||||||
|
* not an issue.
|
||||||
|
*
|
||||||
|
* If this is your first time using a transactional embedded key/value store,
|
||||||
|
* you may find the "GETTING STARTED" section below to be helpful.
|
||||||
|
*
|
||||||
|
* ---
|
||||||
|
* Restrictions and Caveats (in addition to those listed for some functions):
|
||||||
|
*
|
||||||
|
* - Troubleshooting the LCK-file.
|
||||||
|
* 1. A broken LCK-file can cause sync issues, including appearance of
|
||||||
|
* wrong/inconsistent data for readers. When database opened in the
|
||||||
|
* cooperative read-write mode the LCK-file requires to be mapped to
|
||||||
|
* memory in read-write access. In this case it is always possible for
|
||||||
|
* stray/malfunctioned application could writes thru pointers to
|
||||||
|
* silently corrupt the LCK-file.
|
||||||
|
*
|
||||||
|
* Unfortunately, there is no any portable way to prevent such
|
||||||
|
* corruption, since the LCK-file is updated concurrently by
|
||||||
|
* multiple processes in a lock-free manner and any locking is
|
||||||
|
* unwise due to a large overhead.
|
||||||
|
*
|
||||||
|
* The "next" version of libmdbx (MithrilDB) will solve this issue.
|
||||||
|
*
|
||||||
|
* Workaround: Just make all programs using the database close it;
|
||||||
|
* the LCK-file is always reset on first open.
|
||||||
|
*
|
||||||
|
* 2. Stale reader transactions left behind by an aborted program cause
|
||||||
|
* further writes to grow the database quickly, and stale locks can
|
||||||
|
* block further operation.
|
||||||
|
* MDBX checks for stale readers while opening environment and before
|
||||||
|
* growth the database. But in some cases, this may not be enough.
|
||||||
|
*
|
||||||
|
* Workaround: Check for stale readers periodically, using the
|
||||||
|
* mdbx_reader_check() function or the mdbx_stat tool.
|
||||||
|
*
|
||||||
|
* 3. Stale writers will be cleared automatically by MDBX on supprted
|
||||||
|
* platforms. But this is platform-specific, especially of
|
||||||
|
* implementation of shared POSIX-mutexes and support for robust
|
||||||
|
* mutexes. For instance there are no known issues on Linux, OSX,
|
||||||
|
* Windows and FreeBSD.
|
||||||
|
*
|
||||||
|
* Workaround: Otherwise just make all programs using the database
|
||||||
|
* close it; the LCK-file is always reset on first open
|
||||||
|
* of the environment.
|
||||||
|
*
|
||||||
|
* - Do not use MDBX databases on remote filesystems, even between processes
|
||||||
|
* on the same host. This breaks file locks on some platforms, possibly
|
||||||
|
* memory map sync, and certainly sync between programs on different hosts.
|
||||||
|
*
|
||||||
|
* On the other hand, MDBX support the exclusive database operation over
|
||||||
|
* a network, and cooperative read-only access to the database placed on
|
||||||
|
* a read-only network shares.
|
||||||
|
*
|
||||||
|
* - There is no pure read-only mode in a normal explicitly way, since
|
||||||
|
* readers need write access to LCK-file to be ones visible for writer.
|
||||||
|
* MDBX always tries to open/create LCK-file for read-write, but switches
|
||||||
|
* to without-LCK mode on appropriate errors (EROFS, EACCESS, EPERM)
|
||||||
|
* if the read-only mode was requested by the MDBX_RDONLY flag which is
|
||||||
|
* described below.
|
||||||
|
*
|
||||||
|
* The "next" version of libmdbx (MithrilDB) will solve this issue.
|
||||||
|
*
|
||||||
|
* - A thread can only use one transaction at a time, plus any nested
|
||||||
|
* read-write transactions in the non-writemap mode. Each transaction
|
||||||
|
* belongs to one thread. The MDBX_NOTLS flag changes this for read-only
|
||||||
|
* transactions. See below.
|
||||||
|
*
|
||||||
|
* - MDBX_env instance(s) should not be used in child processes after fork().
|
||||||
|
* It would be insane to call fork() and any MDBX-functions simultaneously
|
||||||
|
* from multiple threads. The best way is to prevent the presence of open
|
||||||
|
* MDBX-instances during fork().
|
||||||
|
*
|
||||||
|
* The MDBX_TXN_CHECKPID build-time option, which is ON by default on
|
||||||
|
* non-Windows platforms (i.e. where fork() is available), enables PID
|
||||||
|
* checking at a few critical points. But this does not give any guarantees,
|
||||||
|
* but only allows you to detect such errors a little sooner. Depending on
|
||||||
|
* the platform, you should expect an application crash and/or database
|
||||||
|
* corruption in such cases.
|
||||||
|
*
|
||||||
|
* On the other hand, MDBX allow calling mdbx_close_env() in such cases to
|
||||||
|
* release resources, but no more and in general this is a wrong way.
|
||||||
|
*
|
||||||
|
* - Do not have open an MDBX database twice in the same process at
|
||||||
|
* the same time. Not even from a plain open() call - close()ing it
|
||||||
|
* breaks POSIX's fcntl() advisory locking. It is OK to reopen it after
|
||||||
|
* fork() or exec(), since the opened files has FD_CLOEXEC set.
|
||||||
|
*
|
||||||
|
* Unlike the LMDB, the MDBX uses the "Open file description" locks (aka
|
||||||
|
* OFD-locks) when ones available, also performing additional checks against
|
||||||
|
* double-opening.
|
||||||
|
*
|
||||||
|
* - Avoid long-lived transactions, especially in the scenarios with a high
|
||||||
|
* rate of write transactions. Read transactions prevent reuse of pages
|
||||||
|
* freed by newer write transactions, thus the database can grow quickly.
|
||||||
|
* Write transactions prevent other write transactions, since writes are
|
||||||
|
* serialized.
|
||||||
|
*
|
||||||
|
* The "next" version of libmdbx (MithrilDB) will solve this issue
|
||||||
|
* for read-only transactions.
|
||||||
|
*
|
||||||
|
* - Avoid suspending a process with active transactions. These would then be
|
||||||
|
* "long-lived" as above.
|
||||||
|
*
|
||||||
|
* The "next" version of libmdbx (MithrilDB) will solve this issue.
|
||||||
|
*
|
||||||
|
* - Avoid aborting a process with an active read-only transaction in scenaries
|
||||||
|
* with high rate of write transactions. The transaction becomes "long-lived"
|
||||||
|
* as above until a check for stale readers is performed or the LCK-file is
|
||||||
|
* reset, since the process may not remove it from the lockfile. This does
|
||||||
|
* not apply to write transactions if the system clears stale writers, see
|
||||||
|
* above.
|
||||||
|
*
|
||||||
|
* - An MDBX database configuration will often reserve considerable unused
|
||||||
|
* memory address space and maybe file size for future growth. This does
|
||||||
|
* not use actual memory or disk space, but users may need to understand
|
||||||
|
* the difference so they won't be scared off.
|
||||||
|
*
|
||||||
|
**** GETTING STARTED **********************************************************
|
||||||
|
*
|
||||||
|
* TBD
|
||||||
|
*
|
||||||
|
**** LICENSE AND COPYRUSTING **************************************************
|
||||||
*
|
*
|
||||||
* Copyright 2015-2019 Leonid Yuriev <leo@yuriev.ru>
|
* Copyright 2015-2019 Leonid Yuriev <leo@yuriev.ru>
|
||||||
* and other libmdbx authors: please see AUTHORS file.
|
* and other libmdbx authors: please see AUTHORS file.
|
||||||
@ -44,41 +230,23 @@
|
|||||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
*
|
||||||
/* ACKNOWLEDGEMENTS ************************************************************
|
*
|
||||||
|
**** ACKNOWLEDGEMENTS *********************************************************
|
||||||
*
|
*
|
||||||
* Howard Chu (Symas Corporation) - the author of LMDB,
|
* Howard Chu (Symas Corporation) - the author of LMDB,
|
||||||
* from which originated the MDBX in 2015.
|
* from which originated the MDBX in 2015.
|
||||||
*
|
*
|
||||||
* Martin Hedenfalk <martin@bzero.se> - the author of `btree.c` code,
|
* Martin Hedenfalk <martin@bzero.se> - the author of `btree.c` code,
|
||||||
* which was used for begin development of LMDB. */
|
* which was used for begin development of LMDB.
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
#ifndef LIBMDBX_H
|
#ifndef LIBMDBX_H
|
||||||
#define LIBMDBX_H
|
#define LIBMDBX_H
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
*
|
|
||||||
* libmdbx is superior to LMDB in terms of features and reliability, not
|
|
||||||
* inferior in performance. libmdbx works on Linux, FreeBSD, MacOS X and other
|
|
||||||
* systems compliant with POSIX.1-2008, but also support Windows as a
|
|
||||||
* complementary platform.
|
|
||||||
*
|
|
||||||
* The next version is under active non-public development and will be released
|
|
||||||
* as MithrilDB and libmithrildb for libraries & packages. Admittedly mythical
|
|
||||||
* Mithril is resembling silver but being stronger and lighter than steel.
|
|
||||||
* Therefore MithrilDB is rightly relevant name.
|
|
||||||
*
|
|
||||||
* MithrilDB will be radically different from libmdbx by the new database format
|
|
||||||
* and API based on C++17, as well as the Apache 2.0 License. The goal of this
|
|
||||||
* revolution is to provide a clearer and robust API, add more features and new
|
|
||||||
* valuable properties of database.
|
|
||||||
*
|
|
||||||
* The Future will (be) Positive. Всё будет хорошо.
|
|
||||||
*
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(push, 1)
|
#pragma warning(push, 1)
|
||||||
#pragma warning(disable : 4548) /* expression before comma has no effect; \
|
#pragma warning(disable : 4548) /* expression before comma has no effect; \
|
||||||
@ -201,6 +369,7 @@ typedef pthread_t mdbx_tid_t;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* MDBX version information. */
|
||||||
typedef struct mdbx_version_info {
|
typedef struct mdbx_version_info {
|
||||||
uint8_t major;
|
uint8_t major;
|
||||||
uint8_t minor;
|
uint8_t minor;
|
||||||
@ -214,7 +383,11 @@ typedef struct mdbx_version_info {
|
|||||||
} git;
|
} git;
|
||||||
const char *sourcery /* sourcery anchor for pinning */;
|
const char *sourcery /* sourcery anchor for pinning */;
|
||||||
} mdbx_version_info;
|
} mdbx_version_info;
|
||||||
|
extern LIBMDBX_API const mdbx_version_info mdbx_version;
|
||||||
|
|
||||||
|
/* MDBX build information.
|
||||||
|
* WARNING: Some strings could be NULL in case no corresponding information was
|
||||||
|
* provided at build time (i.e. flags). */
|
||||||
typedef struct mdbx_build_info {
|
typedef struct mdbx_build_info {
|
||||||
const char *datetime /* build timestamp (ISO-8601 or __DATE__ __TIME__) */;
|
const char *datetime /* build timestamp (ISO-8601 or __DATE__ __TIME__) */;
|
||||||
const char *target /* cpu/arch-system-config triplet */;
|
const char *target /* cpu/arch-system-config triplet */;
|
||||||
@ -223,22 +396,48 @@ typedef struct mdbx_build_info {
|
|||||||
const char *flags /* CFLAGS */;
|
const char *flags /* CFLAGS */;
|
||||||
} mdbx_build_info;
|
} mdbx_build_info;
|
||||||
|
|
||||||
extern LIBMDBX_API const mdbx_version_info mdbx_version;
|
|
||||||
extern LIBMDBX_API const mdbx_build_info mdbx_build;
|
extern LIBMDBX_API const mdbx_build_info mdbx_build;
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
#if !MDBX_BUILD_SHARED_LIBRARY
|
#if !MDBX_BUILD_SHARED_LIBRARY
|
||||||
|
|
||||||
/* Dll initialization callback for ability to dynamically load MDBX DLL by
|
/* MDBX internally uses global and thread local storage destructors to
|
||||||
* LoadLibrary() on Windows versions before Windows Vista. This function MUST be
|
* automatically (de)initialization, releasing reader lock table slots
|
||||||
* called once from DllMain() for each reason (DLL_PROCESS_ATTACH,
|
* and so on.
|
||||||
* DLL_PROCESS_DETACH, DLL_THREAD_ATTACH and DLL_THREAD_DETACH). Do this
|
*
|
||||||
* carefully and ONLY when actual Windows version don't support initialization
|
* If MDBX builded as a DLL this is done out-of-the-box by DllEntry() function,
|
||||||
* via "TLS Directory" (e.g .CRT$XL[A-Z] sections in executable or dll file). */
|
* which called automatically by Windows core with passing corresponding reason
|
||||||
|
* argument.
|
||||||
|
*
|
||||||
|
* Otherwise, if MDBX was builded not as a DLL, some black magic
|
||||||
|
* may be required depending of Windows version:
|
||||||
|
* - Modern Windows versions, including Windows Vista and later, provides
|
||||||
|
* support for "TLS Directory" (e.g .CRT$XL[A-Z] sections in executable
|
||||||
|
* or dll file). In this case, MDBX capable of doing all automatically,
|
||||||
|
* and you do not need to call mdbx_dll_callback().
|
||||||
|
* - Obsolete versions of Windows, prior to Windows Vista, REQUIRES calling
|
||||||
|
* mdbx_dll_callback() manually from corresponding DllMain() or WinMain()
|
||||||
|
* of your DLL or application.
|
||||||
|
* - This behavior is under control of the MODX_CONFIG_MANUAL_TLS_CALLBACK
|
||||||
|
* option, which is determined by default according to the target version
|
||||||
|
* of Windows at build time.
|
||||||
|
* But you may override MODX_CONFIG_MANUAL_TLS_CALLBACK in special cases.
|
||||||
|
*
|
||||||
|
* Therefore, building MDBX as a DLL is recommended for all version of Windows.
|
||||||
|
* So, if you doubt, just build MDBX as the separate DLL and don't worry. */
|
||||||
|
|
||||||
#ifndef MDBX_CONFIG_MANUAL_TLS_CALLBACK
|
#ifndef MDBX_CONFIG_MANUAL_TLS_CALLBACK
|
||||||
|
#if defined(_WIN32_WINNT_VISTA) && WINVER >= _WIN32_WINNT_VISTA
|
||||||
|
/* As described above mdbx_dll_callback() is NOT needed forWindows Vista
|
||||||
|
* and later. */
|
||||||
#define MDBX_CONFIG_MANUAL_TLS_CALLBACK 0
|
#define MDBX_CONFIG_MANUAL_TLS_CALLBACK 0
|
||||||
|
#else
|
||||||
|
/* As described above mdbx_dll_callback() IS REQUIRED for Windows versions
|
||||||
|
* prior to Windows Vista. */
|
||||||
|
#define MDBX_CONFIG_MANUAL_TLS_CALLBACK 1
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* MDBX_CONFIG_MANUAL_TLS_CALLBACK */
|
||||||
|
|
||||||
#if MDBX_CONFIG_MANUAL_TLS_CALLBACK
|
#if MDBX_CONFIG_MANUAL_TLS_CALLBACK
|
||||||
void LIBMDBX_API NTAPI mdbx_dll_callback(PVOID module, DWORD reason,
|
void LIBMDBX_API NTAPI mdbx_dll_callback(PVOID module, DWORD reason,
|
||||||
PVOID reserved);
|
PVOID reserved);
|
||||||
@ -246,12 +445,7 @@ void LIBMDBX_API NTAPI mdbx_dll_callback(PVOID module, DWORD reason,
|
|||||||
#endif /* !MDBX_BUILD_SHARED_LIBRARY */
|
#endif /* !MDBX_BUILD_SHARED_LIBRARY */
|
||||||
#endif /* Windows */
|
#endif /* Windows */
|
||||||
|
|
||||||
/* The name of the lock file in the DB environment */
|
/**** TOP-LEVEL STRUCTURES ****************************************************/
|
||||||
#define MDBX_LOCKNAME "/mdbx.lck"
|
|
||||||
/* The name of the data file in the DB environment */
|
|
||||||
#define MDBX_DATANAME "/mdbx.dat"
|
|
||||||
/* The suffix of the lock file when no subdir is used */
|
|
||||||
#define MDBX_LOCK_SUFFIX "-lck"
|
|
||||||
|
|
||||||
/* Opaque structure for a database environment.
|
/* Opaque structure for a database environment.
|
||||||
*
|
*
|
||||||
@ -265,8 +459,11 @@ typedef struct MDBX_env MDBX_env;
|
|||||||
* read-only or read-write. */
|
* read-only or read-write. */
|
||||||
typedef struct MDBX_txn MDBX_txn;
|
typedef struct MDBX_txn MDBX_txn;
|
||||||
|
|
||||||
/* A handle for an individual database in the DB environment. */
|
/* A handle for an individual database (key-value spaces) in the DB environment.
|
||||||
|
* Zero handle is used internally (hidden Garbage Collection DB).
|
||||||
|
* So, any valid DBI-handle great than 0 and less than or equal MDBX_MAX_DBI. */
|
||||||
typedef uint32_t MDBX_dbi;
|
typedef uint32_t MDBX_dbi;
|
||||||
|
#define MDBX_MAX_DBI UINT32_C(32765)
|
||||||
|
|
||||||
/* Opaque structure for navigating through a database */
|
/* Opaque structure for navigating through a database */
|
||||||
typedef struct MDBX_cursor MDBX_cursor;
|
typedef struct MDBX_cursor MDBX_cursor;
|
||||||
@ -298,23 +495,75 @@ typedef struct iovec MDBX_val;
|
|||||||
/* A callback function used to compare two keys in a database */
|
/* A callback function used to compare two keys in a database */
|
||||||
typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
|
typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
|
||||||
|
|
||||||
/* Environment Flags */
|
/**** THE FILES ****************************************************************
|
||||||
/* no environment directory */
|
* At the file system level, the environment corresponds to a pair of files.
|
||||||
|
* */
|
||||||
|
|
||||||
|
/* The name of the lock file in the DB environment */
|
||||||
|
#define MDBX_LOCKNAME "/mdbx.lck"
|
||||||
|
/* The name of the data file in the DB environment */
|
||||||
|
#define MDBX_DATANAME "/mdbx.dat"
|
||||||
|
|
||||||
|
/* The suffix of the lock file when MDBX_NOSUBDIR is used */
|
||||||
|
#define MDBX_LOCK_SUFFIX "-lck"
|
||||||
|
|
||||||
|
/**** Environment Flags *******************************************************/
|
||||||
|
|
||||||
|
/* MDBX_NOSUBDIR = no environment directory.
|
||||||
|
*
|
||||||
|
* - with MDBX_NOSUBDIR = in a filesystem we have the pair of MDBX-files which
|
||||||
|
* names derived from given pathname by appending predefined suffixes.
|
||||||
|
*
|
||||||
|
* - without MDBX_NOSUBDIR = in a filesystem we have the MDBX-directory with
|
||||||
|
* given pathname, within that a pair of MDBX-files with predefined names. */
|
||||||
#define MDBX_NOSUBDIR 0x4000u
|
#define MDBX_NOSUBDIR 0x4000u
|
||||||
/* don't fsync after commit */
|
|
||||||
#define MDBX_NOSYNC 0x10000u
|
/* MDBX_RDONLY = read only mode.
|
||||||
/* read only */
|
* - with MDBX_RDONLY = open environment in read-only mode.
|
||||||
|
*
|
||||||
|
* - without MDBX_RDONLY = open environment in read-write mode*/
|
||||||
#define MDBX_RDONLY 0x20000u
|
#define MDBX_RDONLY 0x20000u
|
||||||
/* don't fsync metapage after commit */
|
|
||||||
#define MDBX_NOMETASYNC 0x40000u
|
/* MDBX_EXCLUSIVE = open DB in exclusive/monopolistic mode.
|
||||||
|
*
|
||||||
|
* - with MDBX_EXCLUSIVE = open environment in exclusive/monopolistic mode
|
||||||
|
* or return MDBX_BUSY if environment already used by other process.
|
||||||
|
* The main feature of the exclusive mode is the ability to open the
|
||||||
|
* environment placed on a network share..
|
||||||
|
*
|
||||||
|
* - without MDBX_RDONLY = open environment in cooperative mode,
|
||||||
|
* i.e. for multi-process access/interaction/cooperation.
|
||||||
|
* The main requirements of the cooperative mode are:
|
||||||
|
* 1. data files MUST be placed in the LOCAL file system,
|
||||||
|
* but NOT on a network share.
|
||||||
|
* 2. environment MUST be opened only by LOCAL processes,
|
||||||
|
* but NOT over a network.
|
||||||
|
* 3. OS kernel (i.e. file system and memory mapping implementation) and
|
||||||
|
* all processes that open the given environment MUST be running
|
||||||
|
* in the physically single RAM with cache-coherency. The only
|
||||||
|
* exception for cache-consistency requirement is Linux on MIPS
|
||||||
|
* architecture, but this case has not been tested for a long time). */
|
||||||
|
#define MDBX_EXCLUSIVE 0x400000u
|
||||||
|
|
||||||
/* use writable mmap */
|
/* use writable mmap */
|
||||||
#define MDBX_WRITEMAP 0x80000u
|
#define MDBX_WRITEMAP 0x80000u
|
||||||
/* use asynchronous msync when MDBX_WRITEMAP is used */
|
/* use asynchronous msync when MDBX_WRITEMAP is used */
|
||||||
#define MDBX_MAPASYNC 0x100000u
|
#define MDBX_MAPASYNC 0x100000u
|
||||||
|
|
||||||
|
/* MDBX_NOSYNC = don't sync data to persistent storage (e.g. disk)
|
||||||
|
* at the end of transaction commit.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define MDBX_NOSYNC 0x10000u
|
||||||
|
|
||||||
|
/* don't fsync metapage after commit */
|
||||||
|
#define MDBX_NOMETASYNC 0x40000u
|
||||||
|
|
||||||
|
/* make a steady-sync only on close and explicit env-sync */
|
||||||
|
#define MDBX_UTTERLY_NOSYNC (MDBX_NOSYNC | MDBX_MAPASYNC)
|
||||||
|
|
||||||
/* tie reader locktable slots to MDBX_txn objects instead of to threads */
|
/* tie reader locktable slots to MDBX_txn objects instead of to threads */
|
||||||
#define MDBX_NOTLS 0x200000u
|
#define MDBX_NOTLS 0x200000u
|
||||||
/* open DB in exclusive/monopolistic mode. */
|
|
||||||
#define MDBX_EXCLUSIVE 0x400000u
|
|
||||||
/* don't do readahead */
|
/* don't do readahead */
|
||||||
#define MDBX_NORDAHEAD 0x800000u
|
#define MDBX_NORDAHEAD 0x800000u
|
||||||
/* don't initialize malloc'd memory before writing to datafile */
|
/* don't initialize malloc'd memory before writing to datafile */
|
||||||
@ -323,8 +572,6 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
|
|||||||
#define MDBX_COALESCE 0x2000000u
|
#define MDBX_COALESCE 0x2000000u
|
||||||
/* LIFO policy for reclaiming FreeDB records */
|
/* LIFO policy for reclaiming FreeDB records */
|
||||||
#define MDBX_LIFORECLAIM 0x4000000u
|
#define MDBX_LIFORECLAIM 0x4000000u
|
||||||
/* make a steady-sync only on close and explicit env-sync */
|
|
||||||
#define MDBX_UTTERLY_NOSYNC (MDBX_NOSYNC | MDBX_MAPASYNC)
|
|
||||||
/* debuging option, fill/perturb released pages */
|
/* debuging option, fill/perturb released pages */
|
||||||
#define MDBX_PAGEPERTURB 0x8000000u
|
#define MDBX_PAGEPERTURB 0x8000000u
|
||||||
|
|
||||||
@ -463,7 +710,8 @@ typedef enum MDBX_cursor_op {
|
|||||||
#define MDBX_BAD_DBI (-30780)
|
#define MDBX_BAD_DBI (-30780)
|
||||||
/* Unexpected problem - txn should abort */
|
/* Unexpected problem - txn should abort */
|
||||||
#define MDBX_PROBLEM (-30779)
|
#define MDBX_PROBLEM (-30779)
|
||||||
/* Another write transaction is running */
|
/* Another write transaction is running or environment is already used while
|
||||||
|
* opening with MDBX_EXCLUSIVE flag */
|
||||||
#define MDBX_BUSY (-30778)
|
#define MDBX_BUSY (-30778)
|
||||||
/* The last defined error code */
|
/* The last defined error code */
|
||||||
#define MDBX_LAST_ERRCODE MDBX_BUSY
|
#define MDBX_LAST_ERRCODE MDBX_BUSY
|
||||||
|
@ -1418,8 +1418,8 @@ static const char *__mdbx_strerr(int errnum) {
|
|||||||
"MDBX_DBS_FULL: Too may DBI (maxdbs reached)",
|
"MDBX_DBS_FULL: Too may DBI (maxdbs reached)",
|
||||||
"MDBX_READERS_FULL: Too many readers (maxreaders reached)",
|
"MDBX_READERS_FULL: Too many readers (maxreaders reached)",
|
||||||
NULL /* MDBX_TLS_FULL (-30789): unused in MDBX */,
|
NULL /* MDBX_TLS_FULL (-30789): unused in MDBX */,
|
||||||
"MDBX_TXN_FULL: Transaction has too many dirty pages - transaction too "
|
"MDBX_TXN_FULL: Transaction has too many dirty pages, "
|
||||||
"big",
|
"i.e transaction too big",
|
||||||
"MDBX_CURSOR_FULL: Internal error - cursor stack limit reached",
|
"MDBX_CURSOR_FULL: Internal error - cursor stack limit reached",
|
||||||
"MDBX_PAGE_FULL: Internal error - page has no more space",
|
"MDBX_PAGE_FULL: Internal error - page has no more space",
|
||||||
"MDBX_MAP_RESIZED: Database contents grew beyond environment mapsize",
|
"MDBX_MAP_RESIZED: Database contents grew beyond environment mapsize",
|
||||||
@ -1430,7 +1430,8 @@ static const char *__mdbx_strerr(int errnum) {
|
|||||||
"DUPFIXED size",
|
"DUPFIXED size",
|
||||||
"MDBX_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
|
"MDBX_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
|
||||||
"MDBX_PROBLEM: Unexpected problem - txn should abort",
|
"MDBX_PROBLEM: Unexpected problem - txn should abort",
|
||||||
"MDBX_BUSY: Another write transaction is started",
|
"MDBX_BUSY: Another write transaction is running or "
|
||||||
|
"environment is already used while opening with MDBX_EXCLUSIVE flag",
|
||||||
};
|
};
|
||||||
|
|
||||||
if (errnum >= MDBX_KEYEXIST && errnum <= MDBX_LAST_ERRCODE) {
|
if (errnum >= MDBX_KEYEXIST && errnum <= MDBX_LAST_ERRCODE) {
|
||||||
@ -1861,10 +1862,10 @@ static int mdbx_page_befree(MDBX_cursor *mc, MDBX_page *mp) {
|
|||||||
/* Loosen or free a single page.
|
/* Loosen or free a single page.
|
||||||
*
|
*
|
||||||
* Saves single pages to a list for future reuse
|
* Saves single pages to a list for future reuse
|
||||||
* in this same txn. It has been pulled from the freeDB
|
* in this same txn. It has been pulled from the GC
|
||||||
* and already resides on the dirty list, but has been
|
* and already resides on the dirty list, but has been
|
||||||
* deleted. Use these pages first before pulling again
|
* deleted. Use these pages first before pulling again
|
||||||
* from the freeDB.
|
* from the GC.
|
||||||
*
|
*
|
||||||
* If the page wasn't dirtied in this txn, just add it
|
* If the page wasn't dirtied in this txn, just add it
|
||||||
* to this txn's free list. */
|
* to this txn's free list. */
|
||||||
@ -2529,7 +2530,7 @@ bailout:
|
|||||||
*
|
*
|
||||||
* If there are free pages available from older transactions, they
|
* If there are free pages available from older transactions, they
|
||||||
* are re-used first. Otherwise allocate a new page at mt_next_pgno.
|
* are re-used first. Otherwise allocate a new page at mt_next_pgno.
|
||||||
* Do not modify the freedB, just merge freeDB records into me_reclaimed_pglist
|
* Do not modify the freedB, just merge GC records into me_reclaimed_pglist
|
||||||
* and move me_last_reclaimed to say which records were consumed. Only this
|
* and move me_last_reclaimed to say which records were consumed. Only this
|
||||||
* function can create me_reclaimed_pglist and move
|
* function can create me_reclaimed_pglist and move
|
||||||
* me_last_reclaimed/mt_next_pgno.
|
* me_last_reclaimed/mt_next_pgno.
|
||||||
@ -2559,7 +2560,7 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
|
|||||||
if (likely(flags & MDBX_ALLOC_GC)) {
|
if (likely(flags & MDBX_ALLOC_GC)) {
|
||||||
flags |= env->me_flags & (MDBX_COALESCE | MDBX_LIFORECLAIM);
|
flags |= env->me_flags & (MDBX_COALESCE | MDBX_LIFORECLAIM);
|
||||||
if (unlikely(mc->mc_flags & C_RECLAIMING)) {
|
if (unlikely(mc->mc_flags & C_RECLAIMING)) {
|
||||||
/* If mc is updating the freeDB, then the befree-list cannot play
|
/* If mc is updating the GC, then the befree-list cannot play
|
||||||
* catch-up with itself by growing while trying to save it. */
|
* catch-up with itself by growing while trying to save it. */
|
||||||
flags &=
|
flags &=
|
||||||
~(MDBX_ALLOC_GC | MDBX_ALLOC_KICK | MDBX_COALESCE | MDBX_LIFORECLAIM);
|
~(MDBX_ALLOC_GC | MDBX_ALLOC_KICK | MDBX_COALESCE | MDBX_LIFORECLAIM);
|
||||||
@ -2822,7 +2823,7 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
|
|||||||
#endif /* MDBX_PNL sort-order */
|
#endif /* MDBX_PNL sort-order */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Use new pages from the map when nothing suitable in the freeDB */
|
/* Use new pages from the map when nothing suitable in the GC */
|
||||||
repg_pos = 0;
|
repg_pos = 0;
|
||||||
pgno = txn->mt_next_pgno;
|
pgno = txn->mt_next_pgno;
|
||||||
rc = MDBX_MAP_FULL;
|
rc = MDBX_MAP_FULL;
|
||||||
@ -3978,7 +3979,7 @@ static __inline int mdbx_backlog_extragap(MDBX_env *env) {
|
|||||||
|
|
||||||
/* LY: Prepare a backlog of pages to modify FreeDB itself,
|
/* LY: Prepare a backlog of pages to modify FreeDB itself,
|
||||||
* while reclaiming is prohibited. It should be enough to prevent search
|
* while reclaiming is prohibited. It should be enough to prevent search
|
||||||
* in mdbx_page_alloc() during a deleting, when freeDB tree is unbalanced. */
|
* in mdbx_page_alloc() during a deleting, when GC tree is unbalanced. */
|
||||||
static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *mc) {
|
static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *mc) {
|
||||||
/* LY: extra page(s) for b-tree rebalancing */
|
/* LY: extra page(s) for b-tree rebalancing */
|
||||||
const int extra =
|
const int extra =
|
||||||
@ -4190,7 +4191,7 @@ retry:
|
|||||||
head_gc_id = MDBX_PNL_LAST(txn->mt_lifo_reclaimed);
|
head_gc_id = MDBX_PNL_LAST(txn->mt_lifo_reclaimed);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* If using records from freeDB which we have not yet deleted,
|
/* If using records from GC which we have not yet deleted,
|
||||||
* now delete them and any we reserved for me_reclaimed_pglist. */
|
* now delete them and any we reserved for me_reclaimed_pglist. */
|
||||||
while (cleaned_gc_id < env->me_last_reclaimed) {
|
while (cleaned_gc_id < env->me_last_reclaimed) {
|
||||||
rc = mdbx_cursor_first(&mc, &key, NULL);
|
rc = mdbx_cursor_first(&mc, &key, NULL);
|
||||||
@ -4356,14 +4357,14 @@ retry:
|
|||||||
// handle befree-list - store ones into singe gc-record
|
// handle befree-list - store ones into singe gc-record
|
||||||
if (befree_stored < MDBX_PNL_SIZE(txn->mt_befree_pages)) {
|
if (befree_stored < MDBX_PNL_SIZE(txn->mt_befree_pages)) {
|
||||||
if (unlikely(!befree_stored)) {
|
if (unlikely(!befree_stored)) {
|
||||||
/* Make sure last page of freeDB is touched and on befree-list */
|
/* Make sure last page of GC is touched and on befree-list */
|
||||||
mc.mc_flags &= ~C_RECLAIMING;
|
mc.mc_flags &= ~C_RECLAIMING;
|
||||||
rc = mdbx_page_search(&mc, NULL, MDBX_PS_LAST | MDBX_PS_MODIFY);
|
rc = mdbx_page_search(&mc, NULL, MDBX_PS_LAST | MDBX_PS_MODIFY);
|
||||||
mc.mc_flags |= C_RECLAIMING;
|
mc.mc_flags |= C_RECLAIMING;
|
||||||
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
|
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
/* Write to last page of freeDB */
|
/* Write to last page of GC */
|
||||||
key.iov_len = sizeof(txn->mt_txnid);
|
key.iov_len = sizeof(txn->mt_txnid);
|
||||||
key.iov_base = &txn->mt_txnid;
|
key.iov_base = &txn->mt_txnid;
|
||||||
do {
|
do {
|
||||||
@ -11973,7 +11974,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
|
|||||||
meta->mp_meta.mm_dbs[MAIN_DBI].md_flags =
|
meta->mp_meta.mm_dbs[MAIN_DBI].md_flags =
|
||||||
read_txn->mt_dbs[MAIN_DBI].md_flags;
|
read_txn->mt_dbs[MAIN_DBI].md_flags;
|
||||||
} else {
|
} else {
|
||||||
/* Count free pages + freeDB pages. Subtract from last_pg
|
/* Count free pages + GC pages. Subtract from last_pg
|
||||||
* to find the new last_pg, which also becomes the new root. */
|
* to find the new last_pg, which also becomes the new root. */
|
||||||
pgno_t freecount = 0;
|
pgno_t freecount = 0;
|
||||||
MDBX_cursor mc;
|
MDBX_cursor mc;
|
||||||
@ -14708,6 +14709,8 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
/* *INDENT-OFF* */
|
||||||
|
/* clang-format off */
|
||||||
|
|
||||||
__dll_export
|
__dll_export
|
||||||
#ifdef __attribute_used__
|
#ifdef __attribute_used__
|
||||||
@ -14778,3 +14781,6 @@ LIBMDBX_API __attribute__((__weak__)) const char *__asan_default_options() {
|
|||||||
"abort_on_error=1";
|
"abort_on_error=1";
|
||||||
}
|
}
|
||||||
#endif /* __SANITIZE_ADDRESS__ */
|
#endif /* __SANITIZE_ADDRESS__ */
|
||||||
|
|
||||||
|
/* *INDENT-ON* */
|
||||||
|
/* clang-format on */
|
||||||
|
@ -238,6 +238,9 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
|||||||
/* Number of DBs in metapage (free and main) - also hardcoded elsewhere */
|
/* Number of DBs in metapage (free and main) - also hardcoded elsewhere */
|
||||||
#define CORE_DBS 2
|
#define CORE_DBS 2
|
||||||
#define MAX_DBI (INT16_MAX - CORE_DBS)
|
#define MAX_DBI (INT16_MAX - CORE_DBS)
|
||||||
|
#if MAX_DBI != MDBX_MAX_DBI
|
||||||
|
#error "Opps, MAX_DBI != MDBX_MAX_DBI"
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Number of meta pages - also hardcoded elsewhere */
|
/* Number of meta pages - also hardcoded elsewhere */
|
||||||
#define NUM_METAS 3
|
#define NUM_METAS 3
|
||||||
|
Loading…
x
Reference in New Issue
Block a user