From e88adf39692ae084f177825e4c7449726dd6ccdb Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Tue, 21 Feb 2017 20:38:28 +0300 Subject: [PATCH] mdbx: preparation to rebirth. --- barriers.h | 6 +- intro.doc | 195 --------------------------------------------------- lmdb.h | 147 +++----------------------------------- mdb.c | 32 ++++----- mdb_chk.c | 4 +- reopen.h | 7 +- yota_test1.c | 4 +- yota_test2.c | 4 +- 8 files changed, 38 insertions(+), 361 deletions(-) delete mode 100644 intro.doc diff --git a/barriers.h b/barriers.h index 1e98730d..ff39cae2 100644 --- a/barriers.h +++ b/barriers.h @@ -14,11 +14,15 @@ /***************************************************************************** * Properly compiler/memory/coherence barriers - * in the most portable way for ReOpenMDBX project. + * in the most portable way for libmdbx project. * * Feedback and comments are welcome. * https://gist.github.com/leo-yuriev/ba186a6bf5cf3a27bae7 */ +#pragma once +/* *INDENT-OFF* */ +/* clang-format off */ + #if defined(__mips) && defined(__linux) /* Only MIPS has explicit cache control */ # include diff --git a/intro.doc b/intro.doc deleted file mode 100644 index 9462df18..00000000 --- a/intro.doc +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright 2015-2017 Leonid Yuriev . - * Copyright 2015-2017 Howard Chu, Symas Corp. - * Copyright 2015,2016 Peter-Service R&D LLC. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -/** @page starting Getting Started - -LMDB is compact, fast, powerful, and robust and implements a simplified -variant of the BerkeleyDB (BDB) API. (BDB is also very powerful, and verbosely -documented in its own right.) After reading this page, the main -\ref mdb documentation should make sense. Thanks to Bert Hubert -for creating the - -initial version of this writeup. - -Everything starts with an environment, created by #mdb_env_create(). -Once created, this environment must also be opened with #mdb_env_open(). - -#mdb_env_open() gets passed a name which is interpreted as a directory -path. Note that this directory must exist already, it is not created -for you. Within that directory, a lock file and a storage file will be -generated. If you don't want to use a directory, you can pass the -#MDB_NOSUBDIR option, in which case the path you provided is used -directly as the data file, and another file with a "-lock" suffix -added will be used for the lock file. - -Once the environment is open, a transaction can be created within it -using #mdb_txn_begin(). Transactions may be read-write or read-only, -and read-write transactions may be nested. A transaction must only -be used by one thread at a time. Transactions are always required, -even for read-only access. The transaction provides a consistent -view of the data. - -Once a transaction has been created, a database can be opened within it -using #mdb_dbi_open(). If only one database will ever be used in the -environment, a NULL can be passed as the database name. For named -databases, the #MDB_CREATE flag must be used to create the database -if it doesn't already exist. Also, #mdb_env_set_maxdbs() must be -called after #mdb_env_create() and before #mdb_env_open() to set the -maximum number of named databases you want to support. - -Note: a single transaction can open multiple databases. Generally -databases should only be opened once, by the first transaction in -the process. After the first transaction completes, the database -handles can freely be used by all subsequent transactions. - -Within a transaction, #mdb_get() can retrieve and #mdb_put() can store single -key/value pairs if that is all you need to do (but see \ref Cursors -below if you want to do more). - -A key/value pair is expressed as two #MDB_val structures. This struct -has two fields, \c mv_size and \c mv_data. The data is a \c void pointer to -an array of \c mv_size bytes. - -Because LMDB is very efficient (and usually zero-copy), the data returned -in an #MDB_val structure may be memory-mapped straight from disk. In -other words look but do not touch (or free() for that matter). -Once a transaction is closed, the values can no longer be used, so -make a copy if you need to keep them after that. - -@section Cursors Cursors - -To do more powerful things, we must use a cursor. - -Within the transaction, a cursor can be created with #mdb_cursor_open(). -With this cursor we can store/retrieve/delete (multiple) values using -#mdb_cursor_get(), #mdb_cursor_put(), and #mdb_cursor_del(). - -#mdb_cursor_get() positions itself depending on the cursor operation -requested, and for some operations, on the supplied key. For example, -to list all key/value pairs in a database, use operation #MDB_FIRST for -the first call to #mdb_cursor_get(), and #MDB_NEXT on subsequent calls, -until the end is hit. - -To retrieve all keys starting from a specified key value, use #MDB_SET. -For more cursor operations, see the \ref mdb docs. - -When using #mdb_cursor_put(), either the function will position the -cursor for you based on the \b key, or you can use operation -#MDB_CURRENT to use the current position of the cursor. Note that -\b key must then match the current position's key. - -@subsection summary Summarizing the Opening - -So we have a cursor in a transaction which opened a database in an -environment which is opened from a filesystem after it was -separately created. - -Or, we create an environment, open it from a filesystem, create a -transaction within it, open a database within that transaction, -and create a cursor within all of the above. - -Got it? - -@section thrproc Threads and Processes - -LMDB uses POSIX locks on files, and these locks have issues if one -process opens a file multiple times. Because of this, do not -#mdb_env_open() a file multiple times from a single process. Instead, -share the LMDB environment that has opened the file across all threads. -Otherwise, if a single process opens the same environment multiple times, -closing it once will remove all the locks held on it, and the other -instances will be vulnerable to corruption from other processes. - -Also note that a transaction is tied to one thread by default using -Thread Local Storage. If you want to pass read-only transactions across -threads, you can use the #MDB_NOTLS option on the environment. - -@section txns Transactions, Rollbacks, etc. - -To actually get anything done, a transaction must be committed using -#mdb_txn_commit(). Alternatively, all of a transaction's operations -can be discarded using #mdb_txn_abort(). In a read-only transaction, -any cursors will \b not automatically be freed. In a read-write -transaction, all cursors will be freed and must not be used again. - -For read-only transactions, obviously there is nothing to commit to -storage. The transaction still must eventually be aborted to close -any database handle(s) opened in it, or committed to keep the -database handles around for reuse in new transactions. - -In addition, as long as a transaction is open, a consistent view of -the database is kept alive, which requires storage. A read-only -transaction that no longer requires this consistent view should -be terminated (committed or aborted) when the view is no longer -needed (but see below for an optimization). - -There can be multiple simultaneously active read-only transactions -but only one that can write. Once a single read-write transaction -is opened, all further attempts to begin one will block until the -first one is committed or aborted. This has no effect on read-only -transactions, however, and they may continue to be opened at any time. - -@section dupkeys Duplicate Keys - -#mdb_get() and #mdb_put() respectively have no and only some support -for multiple key/value pairs with identical keys. If there are multiple -values for a key, #mdb_get() will only return the first value. - -When multiple values for one key are required, pass the #MDB_DUPSORT -flag to #mdb_dbi_open(). In an #MDB_DUPSORT database, by default -#mdb_put() will not replace the value for a key if the key existed -already. Instead it will add the new value to the key. In addition, -#mdb_del() will pay attention to the value field too, allowing for -specific values of a key to be deleted. - -Finally, additional cursor operations become available for -traversing through and retrieving duplicate values. - -@section optim Some Optimization - -If you frequently begin and abort read-only transactions, as an -optimization, it is possible to only reset and renew a transaction. - -#mdb_txn_reset() releases any old copies of data kept around for -a read-only transaction. To reuse this reset transaction, call -#mdb_txn_renew() on it. Any cursors in this transaction must also -be renewed using #mdb_cursor_renew(). - -Note that #mdb_txn_reset() is similar to #mdb_txn_abort() and will -close any databases you opened within the transaction. - -To permanently free a transaction, reset or not, use #mdb_txn_abort(). - -@section cleanup Cleaning Up - -For read-only transactions, any cursors created within it must -be closed using #mdb_cursor_close(). - -It is very rarely necessary to close a database handle, and in -general they should just be left open. - -@section onward The Full API - -The full \ref mdb documentation lists further details, like how to: - - \li size a database (the default limits are intentionally small) - \li drop and clean a database - \li detect and report errors - \li optimize (bulk) loading speed - \li (temporarily) reduce robustness to gain even more speed - \li gather statistics about the database - \li define custom sort orders - -*/ diff --git a/lmdb.h b/lmdb.h index 272ce965..48ca616f 100644 --- a/lmdb.h +++ b/lmdb.h @@ -1,141 +1,13 @@ -/** @file lmdb.h - * @brief Extended Lightning memory-mapped database library +/* + * Copyright 2015-2017 Leonid Yuriev . * - * @mainpage Extended Lightning Memory-Mapped Database (MDBX) - * - * @section intro_sec Introduction - * MDBX is a Btree-based database management library modeled loosely on the - * BerkeleyDB API, but much simplified. The entire database is exposed - * in a memory map, and all data fetches return data directly - * from the mapped memory, so no malloc's or memcpy's occur during - * data fetches. As such, the library is extremely simple because it - * requires no page caching layer of its own, and it is extremely high - * performance and memory-efficient. It is also fully transactional with - * full ACID semantics, and when the memory map is read-only, the - * database integrity cannot be corrupted by stray pointer writes from - * application code. - * - * The library is fully thread-aware and supports concurrent read/write - * access from multiple processes and threads. Data pages use a copy-on- - * write strategy so no active data pages are ever overwritten, which - * also provides resistance to corruption and eliminates the need of any - * special recovery procedures after a system crash. Writes are fully - * serialized; only one write transaction may be active at a time, which - * guarantees that writers can never deadlock. The database structure is - * multi-versioned so readers run with no locks; writers cannot block - * readers, and readers don't block writers. - * - * Unlike other well-known database mechanisms which use either write-ahead - * transaction logs or append-only data writes, MDBX requires no maintenance - * during operation. Both write-ahead loggers and append-only databases - * require periodic checkpointing and/or compaction of their log or database - * files otherwise they grow without bound. MDBX tracks free pages within - * the database and re-uses them for new write operations, so the database - * size does not grow without bound in normal use. - * - * The memory map can be used as a read-only or read-write map. It is - * read-only by default as this provides total immunity to corruption. - * Using read-write mode offers much higher write performance, but adds - * the possibility for stray application writes thru pointers to silently - * corrupt the database. Of course if your application code is known to - * be bug-free (...) then this is not an issue. - * - * If this is your first time using a transactional embedded key/value - * store, you may find the \ref starting page to be helpful. - * - * @section caveats_sec Caveats - * Troubleshooting the lock file: - * - * - A broken lockfile can cause sync issues. - * Stale reader transactions left behind by an aborted program - * cause further writes to grow the database quickly, and - * stale locks can block further operation. - * - * Fix: Check for stale readers periodically, using the - * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. - * Stale writers will be cleared automatically on Linux - * using POSIX mutexes with Robust option. - * Otherwise just make all programs using the database close it; - * the lockfile is always reset on first open of the environment. - * - * - * Restrictions/caveats (in addition to those listed for some functions): - * - * - An MDBX configuration will often reserve considerable \b unused - * memory address space and maybe file size for future growth. - * This does not use actual memory or disk space, but users may need - * to understand the difference so they won't be scared off. - * - * - An LMDB configuration will often reserve considerable \b unused - * memory address space and maybe file size for future growth. - * This does not use actual memory or disk space, but users may need - * to understand the difference so they won't be scared off. - * - * - By default, in versions before 0.9.10, unused portions of the data - * file might receive garbage data from memory freed by other code. - * (This does not happen when using the #MDB_WRITEMAP flag.) As of - * 0.9.10 the default behavior is to initialize such memory before - * writing to the data file. Since there may be a slight performance - * cost due to this initialization, applications may disable it using - * the #MDB_NOMEMINIT flag. Applications handling sensitive data - * which must not be written should not use this flag. This flag is - * irrelevant when using #MDB_WRITEMAP. - * - * - A thread can only use one transaction at a time, plus any child - * transactions. Each transaction belongs to one thread. See below. - * The #MDB_NOTLS flag changes this for read-only transactions. - * - * - Use an MDB_env* in the process which opened it, not after fork(). - * - * - Do not have open an MDBX database twice in the same process at - * the same time. Not even from a plain open() call - close()ing it - * breaks fcntl() advisory locking. (It is OK to reopen it after - * fork() - exec*(), since the lockfile has FD_CLOEXEC set.) - * - * - Avoid long-lived transactions. Read transactions prevent - * reuse of pages freed by newer write transactions, thus the - * database can grow quickly. Write transactions prevent - * other write transactions, since writes are serialized. - * - * - Avoid suspending a process with active transactions. These - * would then be "long-lived" as above. Also read transactions - * suspended when writers commit could sometimes see wrong data. - * - * ...when several processes can use a database concurrently: - * - * - Avoid aborting a process with an active transaction. - * The transaction becomes "long-lived" as above until a check - * for stale readers is performed or the lockfile is reset, - * since the process may not remove it from the lockfile. - * - * This does not apply to write transactions if the system clears - * stale writers, see above. - * - * - If you do that anyway, do a periodic check for stale readers. Or - * close the environment once in a while, so the lockfile can get reset. - * - * - Do not use MDBX databases on remote filesystems, even between - * processes on the same host. This breaks flock() on some OSes, - * possibly memory map sync, and certainly sync between programs - * on different hosts. - * - * - Opening a database can fail if another process is opening or - * closing it at exactly the same time. - * - * @author Leonid Yuriev, 'ReOpen' initiative . - * Howard Chu, Symas Corp. All rights reserved. - * - * @copyright 2015-2017 Leonid Yuriev . - * 2011-2017 Howard Chu, Symas Corp. All rights reserved. + * This code is derived from "LMDB engine" written by + * Howard Chu (Symas Corporation), which itself derived from btree.c + * written by Martin Hedenfalk. * * --- * - * Copyright 2015-2017 Leonid Yuriev . - * Copyright 2015,2016 Peter-Service R&D LLC. - * @par Derived From: - * This code is derived from LMDB engine written by Howard Chu, Symas Corporation. - * - * Copyright 2011-2017 Howard Chu, Symas Corp. All rights reserved. + * Portions Copyright 2011-2017 Howard Chu, Symas Corp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP @@ -145,10 +17,9 @@ * top-level directory of the distribution or, alternatively, at * . * - * @par Derived From: - * This code is derived from btree.c written by Martin Hedenfalk. + * --- * - * Copyright (c) 2009, 2010 Martin Hedenfalk + * Portions Copyright (c) 2009, 2010 Martin Hedenfalk * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -303,7 +174,7 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel /** tie reader locktable slots to #MDB_txn objects instead of to threads */ #define MDB_NOTLS 0x200000 /** don't do any locking, caller must manage their own locks - * WARNING: ReOpenMDBX don't support this mode. */ + * WARNING: libmdbx don't support this mode. */ #define MDB_NOLOCK__UNSUPPORTED 0x400000 /** don't do readahead */ #define MDB_NORDAHEAD 0x800000 diff --git a/mdb.c b/mdb.c index 06ec817b..c26cf0d1 100644 --- a/mdb.c +++ b/mdb.c @@ -1,15 +1,13 @@ -/** @file mdb.c - * @brief Lightning memory-mapped database library - * - * A Btree-based database management library modeled loosely on the - * BerkeleyDB API, but much simplified. - */ - /* * Copyright 2015-2017 Leonid Yuriev . - * Copyright 2011-2017 Howard Chu, Symas Corp. - * Copyright 2015,2016 Peter-Service R&D LLC. - * All rights reserved. + * + * This code is derived from "LMDB engine" written by + * Howard Chu (Symas Corporation), which itself derived from btree.c + * written by Martin Hedenfalk. + * + * --- + * + * Portions Copyright 2011-2017 Howard Chu, Symas Corp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP @@ -19,9 +17,9 @@ * top-level directory of the distribution or, alternatively, at * . * - * This code is derived from btree.c written by Martin Hedenfalk. + * --- * - * Copyright (c) 2009, 2010 Martin Hedenfalk + * Portions Copyright (c) 2009, 2010 Martin Hedenfalk * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -47,27 +45,27 @@ /* LY: Please do not ask us for Windows support, just never! * But you can make a fork for Windows, or become maintainer for FreeBSD... */ #ifndef __gnu_linux__ -# warning "ReOpenMDBX supports only GNU Linux" +# warning "libmdbx supports only GNU Linux" #endif #include #if !defined(__GNUC__) || !__GNUC_PREREQ(4,2) - /* LY: Actualy ReOpenMDBX was not tested with compilers + /* LY: Actualy libmdbx was not tested with compilers * older than GCC 4.4 (from RHEL6). * But you could remove this #error and try to continue at your own risk. * In such case please don't rise up an issues related ONLY to old compilers. */ -# warning "ReOpenMDBX required at least GCC 4.2 compatible C/C++ compiler." +# warning "libmdbx required at least GCC 4.2 compatible C/C++ compiler." #endif #if !defined(__GNU_LIBRARY__) || !__GLIBC_PREREQ(2,12) - /* LY: Actualy ReOpenMDBX was not tested with something + /* LY: Actualy libmdbx was not tested with something * older than glibc 2.12 (from RHEL6). * But you could remove this #error and try to continue at your own risk. * In such case please don't rise up an issues related ONLY to old systems. */ -# warning "ReOpenMDBX required at least GLIBC 2.12." +# warning "libmdbx required at least GLIBC 2.12." #endif #if MDB_DEBUG diff --git a/mdb_chk.c b/mdb_chk.c index 1422eea1..db141b4b 100644 --- a/mdb_chk.c +++ b/mdb_chk.c @@ -6,12 +6,12 @@ * * This file is part of libmdbx. * - * ReOpenMDBX is free software; you can redistribute it and/or modify it under + * libmdbx is free software; you can redistribute it and/or modify it under * the terms of the GNU Affero General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * - * ReOpenMDBX is distributed in the hope that it will be useful, + * libmdbx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. diff --git a/reopen.h b/reopen.h index dd214172..ee828b94 100644 --- a/reopen.h +++ b/reopen.h @@ -12,8 +12,9 @@ * . */ -#ifndef _REOPEN_H -#define _REOPEN_H +#pragma once +/* *INDENT-OFF* */ +/* clang-format off */ #ifndef __CLANG_PREREQ # ifdef __clang__ @@ -233,5 +234,3 @@ __extern_C void __assert_fail( ((void)(addr), (void)(size)) # define ATTRIBUTE_NO_SANITIZE_ADDRESS #endif /* __SANITIZE_ADDRESS__ */ - -#endif /* _REOPEN_H */ diff --git a/yota_test1.c b/yota_test1.c index be727cbf..0cad5468 100644 --- a/yota_test1.c +++ b/yota_test1.c @@ -4,12 +4,12 @@ * * This file is part of libmdbx. * - * ReOpenMDBX is free software; you can redistribute it and/or modify it under + * libmdbx is free software; you can redistribute it and/or modify it under * the terms of the GNU Affero General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * - * ReOpenMDBX is distributed in the hope that it will be useful, + * libmdbx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. diff --git a/yota_test2.c b/yota_test2.c index 753bea2f..80dc4f2f 100644 --- a/yota_test2.c +++ b/yota_test2.c @@ -4,12 +4,12 @@ * * This file is part of libmdbx. * - * ReOpenMDBX is free software; you can redistribute it and/or modify it under + * libmdbx is free software; you can redistribute it and/or modify it under * the terms of the GNU Affero General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * - * ReOpenMDBX is distributed in the hope that it will be useful, + * libmdbx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details.