mdbx: drop/deprecate MDBX_MAPASYNC.

Change-Id: I472f97f568a32325eb056c8ee4d2f2350a473bda
This commit is contained in:
Leonid Yuriev 2020-08-01 19:13:17 +03:00
parent 135bead730
commit 5e43ee61a2
11 changed files with 182 additions and 214 deletions

View File

@ -155,7 +155,7 @@ check: test dist
test: build-test test: build-test
rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \
(./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=$(TEST_ITER) --pathname=$(TEST_DB) --dont-cleanup-after basic && \ (./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=$(TEST_ITER) --pathname=$(TEST_DB) --dont-cleanup-after basic && \
./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=12 --pathname=$(TEST_DB) --dont-cleanup-after basic) \ ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=12 --pathname=$(TEST_DB) --dont-cleanup-after basic) \
| tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \
&& ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy && ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy
@ -163,7 +163,7 @@ test-singleprocess: all mdbx_test
rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \
(./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \ (./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \
./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ ./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \
./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --nested) \ ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --nested) \
| tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \
&& ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy && ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy
@ -178,7 +178,7 @@ memcheck test-valgrind:
rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ( \ rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ( \
$(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after basic && \
$(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ $(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \
$(VALGRIND) ./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ $(VALGRIND) ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after basic && \
$(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \ $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \
$(VALGRIND) ./mdbx_chk -vvn $(TEST_DB)-copy \ $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB)-copy \
) | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) ) | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42)

View File

@ -227,7 +227,7 @@ the user's point of view.
7. Fast estimation of range query result volume, i.e. how many items can 7. Fast estimation of range query result volume, i.e. how many items can
be found between a `KEY1` and a `KEY2`. This is a prerequisite for build be found between a `KEY1` and a `KEY2`. This is a prerequisite for build
and/or optimize query execution plans. and/or optimize query execution plans.
> _libmdbx_ performs a rough estimate based on common B-tree pages of the paths from root to corresponding keys. > _libmdbx_ performs a rough estimate based on common B-tree pages of the paths from root to corresponding keys.
8. `mdbx_chk` tool for database integrity check. 8. `mdbx_chk` tool for database integrity check.
@ -259,9 +259,13 @@ pair, to the first, to the last, or not set to anything.
## Other fixes and specifics ## Other fixes and specifics
1. Fixed more than 10 significant errors, in particular: page leaks, wrong sub-database statistics, segfault in several conditions, nonoptimal page merge strategy, updating an existing record with a change in data size (including for multimap), etc. 1. Fixed more than 10 significant errors, in particular: page leaks,
wrong sub-database statistics, segfault in several conditions,
nonoptimal page merge strategy, updating an existing record with
a change in data size (including for multimap), etc.
2. All cursors can be reused and should be closed explicitly, regardless ones were opened within a write or read transaction. 2. All cursors can be reused and should be closed explicitly,
regardless ones were opened within a write or read transaction.
3. Opening database handles are spared from race conditions and 3. Opening database handles are spared from race conditions and
pre-opening is not needed. pre-opening is not needed.
@ -269,10 +273,9 @@ pre-opening is not needed.
4. Returning `MDBX_EMULTIVAL` error in case of ambiguous update or delete. 4. Returning `MDBX_EMULTIVAL` error in case of ambiguous update or delete.
5. Guarantee of database integrity even in asynchronous unordered write-to-disk mode. 5. Guarantee of database integrity even in asynchronous unordered write-to-disk mode.
> _libmdbx_ propose additional trade-off by implementing append-like manner for updates > _libmdbx_ propose additional trade-off by `MDBX_SAFE_NOSYNC` with append-like manner for updates,
> in `MDBX_SAFE_NOSYNC` and `MDBX_WRITEMAP|MDBX_MAPASYNC` modes, that avoid database corruption after a system crash > that avoids database corruption after a system crash contrary to LMDB.
> contrary to LMDB. Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode is available to match LMDB behaviour, > Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode is available to match behaviour of the `MDB_NOSYNC` in LMDB.
> and for special use-cases.
6. On **MacOS & iOS** the `fcntl(F_FULLFSYNC)` syscall is used _by 6. On **MacOS & iOS** the `fcntl(F_FULLFSYNC)` syscall is used _by
default_ to synchronize data with the disk, as this is [the only way to default_ to synchronize data with the disk, as this is [the only way to

115
mdbx.h
View File

@ -809,13 +809,12 @@ enum MDBX_env_flags_t {
* series of write transactions, will be as small as possible. Thus creates * series of write transactions, will be as small as possible. Thus creates
* ideal conditions for the efficient operation of the disk write-back cache. * ideal conditions for the efficient operation of the disk write-back cache.
* *
* \ref MDBX_LIFORECLAIM is compatible with all no-sync flags (i.e. * \ref MDBX_LIFORECLAIM is compatible with all no-sync flags, but gives NO
* \ref MDBX_NOMETASYNC, \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC, \ref * noticeable impact in combination with \ref MDBX_SAFE_NOSYNC or
* MDBX_MAPASYNC), but gives no noticeable impact in combination with \ref * \ref MDBX_UTTERLY_NOSYNC. Because MDBX will reused pages only before the
* MDBX_SAFE_NOSYNC. Because MDBX will reused pages only before the last * last "steady" MVCC-snapshot, i.e. the loop length of database pages
* "steady" MVCC-snapshot, i.e. the loop length of database pages circulation * circulation will be mostly defined by frequency of calling
* will be mostly defined by frequency of calling `mdbx_env_sync()` rather * `mdbx_env_sync()` rather than LIFO and FIFO difference.
* than LIFO and FIFO difference.
* *
* This flag may be changed at any time using mdbx_env_set_flags(). */ * This flag may be changed at any time using mdbx_env_set_flags(). */
MDBX_LIFORECLAIM = UINT32_C(0x4000000), MDBX_LIFORECLAIM = UINT32_C(0x4000000),
@ -827,9 +826,9 @@ enum MDBX_env_flags_t {
/** \defgroup sync_modes SYNC MODES /** \defgroup sync_modes SYNC MODES
* *
* \attention Using any combination of \ref MDBX_SAFE_NOSYNC, \ref * \attention Using any combination of \ref MDBX_SAFE_NOSYNC, \ref
* MDBX_NOMETASYNC, \ref MDBX_MAPASYNC and especially \ref MDBX_UTTERLY_NOSYNC * MDBX_NOMETASYNC and especially \ref MDBX_UTTERLY_NOSYNC is always a deal to
* is always a deal to reduce durability for gain write performance. You must * reduce durability for gain write performance. You must know exactly what
* know exactly what you are doing and what risks you are taking! * you are doing and what risks you are taking!
* *
* \note for LMDB users: \ref MDBX_SAFE_NOSYNC is NOT similar to LMDB_NOSYNC, * \note for LMDB users: \ref MDBX_SAFE_NOSYNC is NOT similar to LMDB_NOSYNC,
* but \ref MDBX_UTTERLY_NOSYNC is exactly match LMDB_NOSYNC. See details * but \ref MDBX_UTTERLY_NOSYNC is exactly match LMDB_NOSYNC. See details
@ -863,7 +862,6 @@ enum MDBX_env_flags_t {
* *
* \see MDBX_NOMETASYNC * \see MDBX_NOMETASYNC
* \see MDBX_SAFE_NOSYNC * \see MDBX_SAFE_NOSYNC
* \see MDBX_MAPASYNC
* \see MDBX_UTTERLY_NOSYNC * \see MDBX_UTTERLY_NOSYNC
* *
* @{ */ * @{ */
@ -893,13 +891,14 @@ enum MDBX_env_flags_t {
* huge difference in how are recycled the MVCC snapshots corresponding to * huge difference in how are recycled the MVCC snapshots corresponding to
* previous "steady" transactions (see below). * previous "steady" transactions (see below).
* *
* With \ref MDBX_WRITEMAP the `MDBX_SAFE_NOSYNC` instructs MDBX to use
* asynchronous mmap-flushes to disk. Asynchronous mmap-flushes means that
* actually all writes will scheduled and performed by operation system on it
* own manner, i.e. unordered. MDBX itself just notify operating system that
* it would be nice to write data to disk, but no more.
*
* Depending on the platform and hardware, with `MDBX_SAFE_NOSYNC` you may get * Depending on the platform and hardware, with `MDBX_SAFE_NOSYNC` you may get
* a multiple increase of write performance, even 10 times or more. \note Note * a multiple increase of write performance, even 10 times or more.
* that (`MDBX_SAFE_NOSYNC` | \ref MDBX_WRITEMAP) leaves the system with no
* hint for when to write transactions to disk. Therefore the
* (\ref MDBX_MAPASYNC | \ref MDBX_WRITEMAP) may be preferable, but without
* `MDBX_SAFE_NOSYNC` because the (\ref MDBX_MAPASYNC | `MDBX_SAFE_NOSYNC`)
* actually gives \ref MDBX_UTTERLY_NOSYNC.
* *
* In contrast to \ref MDBX_UTTERLY_NOSYNC mode, with `MDBX_SAFE_NOSYNC` flag * In contrast to \ref MDBX_UTTERLY_NOSYNC mode, with `MDBX_SAFE_NOSYNC` flag
* MDBX will keeps untouched pages within B-tree of the last transaction * MDBX will keeps untouched pages within B-tree of the last transaction
@ -933,43 +932,15 @@ enum MDBX_env_flags_t {
* *
* `MDBX_SAFE_NOSYNC` flag may be changed at any time using * `MDBX_SAFE_NOSYNC` flag may be changed at any time using
* \ref mdbx_env_set_flags() or by passing to \ref mdbx_txn_begin() for * \ref mdbx_env_set_flags() or by passing to \ref mdbx_txn_begin() for
* particular write transaction. * particular write transaction. */
*
* \warning don't combine this flag with \ref MDBX_MAPASYNC since you will got
* \ref MDBX_UTTERLY_NOSYNC in that way. \see sync_modes */
MDBX_SAFE_NOSYNC = UINT32_C(0x10000), MDBX_SAFE_NOSYNC = UINT32_C(0x10000),
/** Use asynchronous msync when \ref MDBX_WRITEMAP is used. /** \deprecated Please use \ref MDBX_SAFE_NOSYNC instead of `MDBX_MAPASYNC`.
* *
* `MDBX_MAPASYNC` meaningful and give effect only in conjunction * Since version 0.9.x the `MDBX_MAPASYNC` is deprecated and has the same
* with `MDBX_WRITEMAP` or `MDBX_SAFE_NOSYNC`: * effect as \ref MDBX_SAFE_NOSYNC with \ref MDBX_WRITEMAP. This just API
* - with \ref MDBX_SAFE_NOSYNC actually gives \ref MDBX_UTTERLY_NOSYNC, * simplification is for convenience and clarity. */
* which wipe previous steady commits for reuse pages as described above. MDBX_MAPASYNC = MDBX_SAFE_NOSYNC,
* - with \ref MDBX_WRITEMAP but without \ref MDBX_SAFE_NOSYNC instructs MDBX
* to use asynchronous mmap-flushes to disk as described below.
* - with both \ref MDBX_WRITEMAP and \ref MDBX_SAFE_NOSYNC you get the both
* effects.
*
* Asynchronous mmap-flushes means that actually all writes will scheduled and
* performed by operation system on it own manner, i.e. unordered. MDBX itself
* just notify operating system that it would be nice to write data to disk,
* but no more.
*
* With \ref MDBX_MAPASYNC flag, but without \ref MDBX_UTTERLY_NOSYNC (i.e.
* without OR'ing with \ref MDBX_SAFE_NOSYNC) MDBX will keeps untouched pages
* within B-tree of the last transaction "steady" which was synced to disk
* completely. So, this makes exactly the same "long-lived" impact and the
* same consequences as described above for \ref MDBX_SAFE_NOSYNC flag.
*
* Depending on the platform and hardware, with combination of
* \ref MDBX_WRITEMAP and \ref MDBX_MAPASYNC you may get a multiple increase
* of write performance, even 10-100 times or more. \ref MDBX_MAPASYNC flag
* may be changed at any time using \ref mdbx_env_set_flags() or by passing to
* \ref mdbx_txn_begin() for particular write transaction.
*
* \warning don't combine this flag with \ref MDBX_SAFE_NOSYNC since you will
* got \ref MDBX_UTTERLY_NOSYNC in that way. \see sync_modes */
MDBX_MAPASYNC = UINT32_C(0x100000),
/** Don't sync anything and wipe previous steady commits. /** Don't sync anything and wipe previous steady commits.
* *
@ -1012,7 +983,7 @@ enum MDBX_env_flags_t {
* `MDBX_UTTERLY_NOSYNC` flag may be changed at any time using * `MDBX_UTTERLY_NOSYNC` flag may be changed at any time using
* \ref mdbx_env_set_flags(), but don't has effect if passed to * \ref mdbx_env_set_flags(), but don't has effect if passed to
* \ref mdbx_txn_begin() for particular write transaction. \see sync_modes */ * \ref mdbx_txn_begin() for particular write transaction. \see sync_modes */
MDBX_UTTERLY_NOSYNC = MDBX_SAFE_NOSYNC | MDBX_MAPASYNC, MDBX_UTTERLY_NOSYNC = MDBX_SAFE_NOSYNC | UINT32_C(0x100000),
/** @} end of SYNC MODES */ /** @} end of SYNC MODES */
@ -1459,14 +1430,14 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* \ref MDBX_NOMEMINIT, \ref MDBX_COALESCE, \ref MDBX_LIFORECLAIM. * \ref MDBX_NOMEMINIT, \ref MDBX_COALESCE, \ref MDBX_LIFORECLAIM.
* See \ref env_flags section. * See \ref env_flags section.
* *
* - \ref MDBX_NOMETASYNC, \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC, * - \ref MDBX_NOMETASYNC, \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC.
* \ref MDBX_MAPASYNC. See \ref sync_modes section. * See \ref sync_modes section.
* *
* \note `MDB_NOLOCK` flag don't supported by MDBX, * \note `MDB_NOLOCK` flag don't supported by MDBX,
* try use \ref MDBX_EXCLUSIVE as a replacement. * try use \ref MDBX_EXCLUSIVE as a replacement.
* *
* \note MDBX don't allow to mix processes with different \ref MDBX_SAFE_NOSYNC, * \note MDBX don't allow to mix processes with different \ref MDBX_SAFE_NOSYNC
* \ref MDBX_MAPASYNC flags on the same environment. * flags on the same environment.
* In such case \ref MDBX_INCOMPATIBLE will be returned. * In such case \ref MDBX_INCOMPATIBLE will be returned.
* *
* If the database is already exist and parameters specified early by * If the database is already exist and parameters specified early by
@ -1493,7 +1464,7 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* more than once. * more than once.
* \retval MDBX_INCOMPATIBLE Environment is already opened by another process, * \retval MDBX_INCOMPATIBLE Environment is already opened by another process,
* but with different set of \ref MDBX_SAFE_NOSYNC, * but with different set of \ref MDBX_SAFE_NOSYNC,
* \ref MDBX_MAPASYNC flags. * \ref MDBX_UTTERLY_NOSYNC flags.
* Or if the database is already exist and parameters * Or if the database is already exist and parameters
* specified early by \ref mdbx_env_set_geometry() * specified early by \ref mdbx_env_set_geometry()
* are incompatible (i.e. different pagesize, etc). * are incompatible (i.e. different pagesize, etc).
@ -1697,7 +1668,7 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_env_info(MDBX_env *env, MDBX_envinfo *info,
* \ingroup c_extra * \ingroup c_extra
* *
* Unless the environment was opened with no-sync flags (\ref MDBX_NOMETASYNC, * Unless the environment was opened with no-sync flags (\ref MDBX_NOMETASYNC,
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC and \ref MDBX_MAPASYNC), then * \ref MDBX_SAFE_NOSYNC and \ref MDBX_UTTERLY_NOSYNC), then
* data is always written an flushed to disk when \ref mdbx_txn_commit() is * data is always written an flushed to disk when \ref mdbx_txn_commit() is
* called. Otherwise \ref mdbx_env_sync() may be called to manually write and * called. Otherwise \ref mdbx_env_sync() may be called to manually write and
* flush unsynced data to disk. * flush unsynced data to disk.
@ -1741,18 +1712,18 @@ LIBMDBX_API int mdbx_env_sync(MDBX_env *env);
LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env); LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
/** Sets threshold to force flush the data buffers to disk, even any of /** Sets threshold to force flush the data buffers to disk, even any of
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC and \ref MDBX_MAPASYNC flags in * \ref MDBX_SAFE_NOSYNC flag in the environment.
* the environment.
* \ingroup c_settings * \ingroup c_settings
* *
* The threshold value affects all processes which operates with given * The threshold value affects all processes which operates with given
* environment until the last process close environment or a new value will be * environment until the last process close environment or a new value will be
* settled. * settled.
* *
* Data is always written to disk when \ref mdbx_txn_commit() is called, but * Data is always written to disk when \ref mdbx_txn_commit() is called, but
* the operating system may keep it buffered. MDBX always flushes the OS buffers * the operating system may keep it buffered. MDBX always flushes the OS buffers
* upon commit as well, unless the environment was opened with * upon commit as well, unless the environment was opened with
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_MAPASYNC or in part \ref MDBX_NOMETASYNC. * \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC
* or in part \ref MDBX_NOMETASYNC.
* *
* The default is 0, than mean no any threshold checked, and no additional * The default is 0, than mean no any threshold checked, and no additional
* flush will be made. * flush will be made.
@ -1765,8 +1736,7 @@ LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold); LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold);
/** Sets relative period since the last unsteay commit to force flush the data /** Sets relative period since the last unsteay commit to force flush the data
* buffers to disk, even any of \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC and * buffers to disk, even of \ref MDBX_SAFE_NOSYNC flag in the environment.
* \ref MDBX_MAPASYNC flags in the environment.
* \ingroup c_settings * \ingroup c_settings
* *
* The relative period value affects all processes which operates with given * The relative period value affects all processes which operates with given
@ -1776,7 +1746,7 @@ LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold);
* Data is always written to disk when \ref mdbx_txn_commit() is called, but the * Data is always written to disk when \ref mdbx_txn_commit() is called, but the
* operating system may keep it buffered. MDBX always flushes the OS buffers * operating system may keep it buffered. MDBX always flushes the OS buffers
* upon commit as well, unless the environment was opened with * upon commit as well, unless the environment was opened with
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_MAPASYNC or in part \ref MDBX_NOMETASYNC. * \ref MDBX_SAFE_NOSYNC or in part \ref MDBX_NOMETASYNC.
* *
* Settled period don't checked asynchronously, but only by the * Settled period don't checked asynchronously, but only by the
* \ref mdbx_txn_commit() and \ref mdbx_env_sync() functions. Therefore, in * \ref mdbx_txn_commit() and \ref mdbx_env_sync() functions. Therefore, in
@ -1809,12 +1779,12 @@ LIBMDBX_API int mdbx_env_set_syncperiod(MDBX_env *env,
* \ref mdbx_env_create(). * \ref mdbx_env_create().
* *
* \param [in] dont_sync A dont'sync flag, if non-zero the last checkpoint * \param [in] dont_sync A dont'sync flag, if non-zero the last checkpoint
* will be kept "as is" and may be still "weak" in the * will be kept "as is" and may be still "weak" in the
* \ref MDBX_UTTERLY_NOSYNC or \ref MDBX_MAPASYNC modes. * \ref MDBX_SAFE_NOSYNC or \ref MDBX_UTTERLY_NOSYNC
* Such "weak" checkpoint will be ignored on opening next * modes. Such "weak" checkpoint will be ignored on
* time, and transactions since the last non-weak * opening next time, and transactions since the last
* checkpoint (meta-page update) will rolledback for * non-weak checkpoint (meta-page update) will rolledback
* consistency guarantee. * for consistency guarantee.
* *
* \returns A non-zero error value on failure and 0 on success, * \returns A non-zero error value on failure and 0 on success,
* some possible errors are: * some possible errors are:
@ -2294,8 +2264,7 @@ LIBMDBX_API void *mdbx_env_get_userctx(const MDBX_env *env);
* - \ref MDBX_TRYTXN Do not block when starting * - \ref MDBX_TRYTXN Do not block when starting
* a write transaction. * a write transaction.
* *
* - \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC or * - \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC.
* \ref MDBX_MAPASYNC.
* Do not sync data to disk corresponding * Do not sync data to disk corresponding
* to \ref MDBX_NOMETASYNC or \ref MDBX_SAFE_NOSYNC * to \ref MDBX_NOMETASYNC or \ref MDBX_SAFE_NOSYNC
* description. \see sync_modes. * description. \see sync_modes.

View File

@ -4922,7 +4922,7 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER)) SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER))
err = errno; err = errno;
#else #else
err = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA); err = mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA);
#endif #endif
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
@ -5646,7 +5646,7 @@ __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
if (outside_txn) { if (outside_txn) {
if (unsynced_pages > /* FIXME: define threshold */ 16 && if (unsynced_pages > /* FIXME: define threshold */ 16 &&
(flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0) { (flags & MDBX_SAFE_NOSYNC) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
const size_t usedbytes = pgno_align2os_bytes(env, head->mm_geo.next); const size_t usedbytes = pgno_align2os_bytes(env, head->mm_geo.next);
@ -5655,7 +5655,7 @@ __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
/* LY: pre-sync without holding lock to reduce latency for writer(s) */ /* LY: pre-sync without holding lock to reduce latency for writer(s) */
int err = (flags & MDBX_WRITEMAP) int err = (flags & MDBX_WRITEMAP)
? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes, false) ? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes, false)
: mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA); : mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
@ -5673,7 +5673,7 @@ __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
} }
if (!META_IS_STEADY(head) || if (!META_IS_STEADY(head) ||
((flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) { ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) {
mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO, mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO,
data_page(head)->mp_pgno, mdbx_durable_str(head), data_page(head)->mp_pgno, mdbx_durable_str(head),
unsynced_pages); unsynced_pages);
@ -5694,11 +5694,10 @@ fastpath:
if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_NOMETASYNC) != 0) { if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_NOMETASYNC) != 0) {
const txnid_t head_txnid = mdbx_recent_committed_txnid(env); const txnid_t head_txnid = mdbx_recent_committed_txnid(env);
if (*env->me_meta_sync_txnid != (uint32_t)head_txnid) { if (*env->me_meta_sync_txnid != (uint32_t)head_txnid) {
rc = rc = (flags & MDBX_WRITEMAP)
(flags & MDBX_WRITEMAP) ? mdbx_msync(&env->me_dxb_mmap, 0,
? mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), false)
pgno_align2os_bytes(env, NUM_METAS), false) : mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
: mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
*env->me_meta_sync_txnid = (uint32_t)head_txnid; *env->me_meta_sync_txnid = (uint32_t)head_txnid;
} }
@ -6480,7 +6479,7 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
mdbx_assert(env, mdbx_assert(env,
(txn->mt_flags & ~(MDBX_NOTLS | MDBX_RDONLY | MDBX_WRITEMAP | (txn->mt_flags & ~(MDBX_NOTLS | MDBX_RDONLY | MDBX_WRITEMAP |
MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0); MDBX_SAFE_NOSYNC)) == 0);
txn->mt_signature = MDBX_MT_SIGNATURE; txn->mt_signature = MDBX_MT_SIGNATURE;
*ret = txn; *ret = txn;
mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO
@ -8584,7 +8583,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0); mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0);
mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now); mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now);
if (flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) { if (flags & MDBX_SAFE_NOSYNC) {
/* Check auto-sync conditions */ /* Check auto-sync conditions */
const pgno_t autosync_threshold = *env->me_autosync_threshold; const pgno_t autosync_threshold = *env->me_autosync_threshold;
const uint64_t autosync_period = *env->me_autosync_period; const uint64_t autosync_period = *env->me_autosync_period;
@ -8681,38 +8680,27 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
} }
/* LY: step#1 - sync previously written/updated data-pages */ /* LY: step#1 - sync previously written/updated data-pages */
int rc = *env->me_unsynced_pages ? MDBX_RESULT_TRUE /* carry non-steady */ int rc = MDBX_RESULT_FALSE /* carry steady */;
: MDBX_RESULT_FALSE /* carry steady */; if (*env->me_unsynced_pages) {
if (rc != MDBX_RESULT_FALSE && (flags & MDBX_SAFE_NOSYNC) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
MDBX_meta *const recent_steady_meta = mdbx_meta_steady(env); enum mdbx_syncmode_bits mode_bits = MDBX_SYNC_NONE;
if (flags & MDBX_WRITEMAP) { if ((flags & MDBX_SAFE_NOSYNC) == 0) {
const size_t begin = mode_bits = MDBX_SYNC_DATA;
floor_powerof2(pgno2bytes(env, NUM_METAS), env->me_os_psize); if (pending->mm_geo.next > mdbx_meta_steady(env)->mm_geo.now)
const size_t end = pgno_align2os_bytes(env, pending->mm_geo.next); mode_bits |= MDBX_SYNC_SIZE;
if (end > begin) { if (flags & MDBX_NOMETASYNC)
rc = mdbx_msync(&env->me_dxb_mmap, begin, end - begin, mode_bits |= MDBX_SYNC_IODQ;
flags & MDBX_MAPASYNC);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
rc = MDBX_RESULT_TRUE /* carry non-steady */;
if ((flags & MDBX_MAPASYNC) == 0) {
if (unlikely(pending->mm_geo.next > recent_steady_meta->mm_geo.now)) {
rc = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_SIZE);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
rc = MDBX_RESULT_FALSE /* carry steady */;
}
} else {
rc = mdbx_filesync(env->me_lazy_fd,
(pending->mm_geo.next > recent_steady_meta->mm_geo.now)
? MDBX_SYNC_DATA | MDBX_SYNC_SIZE
: MDBX_SYNC_DATA);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
} }
if (flags & MDBX_WRITEMAP)
rc =
mdbx_msync(&env->me_dxb_mmap, 0,
pgno_align2os_bytes(env, pending->mm_geo.next), mode_bits);
else
rc = mdbx_fsync(env->me_lazy_fd, mode_bits);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */
: MDBX_RESULT_FALSE /* carry steady */;
} }
/* Steady or Weak */ /* Steady or Weak */
@ -8825,24 +8813,13 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
target->mm_datasync_sign = pending->mm_datasync_sign; target->mm_datasync_sign = pending->mm_datasync_sign;
mdbx_flush_incoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
if ((flags & MDBX_SAFE_NOSYNC) == 0) { /* sync meta-pages */
/* sync meta-pages */ rc =
const bool weak = (flags & (MDBX_MAPASYNC | MDBX_NOMETASYNC)) != 0; mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
rc = mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), (flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE
weak); : MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
if (!weak) {
#if defined(__APPLE__) && \
MDBX_OSX_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY
rc = likely(fcntl(env->me_lazy_fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS
: errno;
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
#endif /* MacOS */
*env->me_meta_sync_txnid = pending->mm_txnid_a.low;
}
}
} else { } else {
const MDBX_meta undo_meta = *target; const MDBX_meta undo_meta = *target;
const mdbx_filehandle_t fd = (env->me_dsync_fd != INVALID_HANDLE_VALUE) const mdbx_filehandle_t fd = (env->me_dsync_fd != INVALID_HANDLE_VALUE)
@ -8860,16 +8837,17 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
goto fail; goto fail;
} }
mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize); mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize);
if ((flags & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC)) == 0) { /* sync meta-pages */
/* sync meta-pages */ if ((flags & MDBX_NOMETASYNC) == 0 && fd == env->me_lazy_fd) {
if (fd == env->me_lazy_fd) { rc = mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
rc = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); if (rc != MDBX_SUCCESS)
if (rc != MDBX_SUCCESS) goto undo;
goto undo;
}
*env->me_meta_sync_txnid = pending->mm_txnid_a.low;
} }
} }
if (flags & MDBX_NOMETASYNC)
*env->me_unsynced_pages += 1;
else
*env->me_meta_sync_txnid = pending->mm_txnid_a.low;
/* LY: shrink datafile if needed */ /* LY: shrink datafile if needed */
if (unlikely(shrink)) { if (unlikely(shrink)) {
@ -9951,7 +9929,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
mdbx_error("initial-%s for lck-file failed", "msync"); mdbx_error("initial-%s for lck-file failed", "msync");
goto bailout; goto bailout;
} }
err = mdbx_filesync(env->me_lck_mmap.fd, MDBX_SYNC_SIZE); err = mdbx_fsync(env->me_lck_mmap.fd, MDBX_SYNC_SIZE);
if (unlikely(err != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
mdbx_error("initial-%s for lck-file failed", "fsync"); mdbx_error("initial-%s for lck-file failed", "fsync");
goto bailout; goto bailout;
@ -10112,13 +10090,24 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) {
: MDBX_RESULT_TRUE; : MDBX_RESULT_TRUE;
} }
/* Merge flags and avoid false MDBX_UTTERLY_NOSYNC */ /* Merge sync flags */
static uint32_t merge_flags(const uint32_t a, const uint32_t b) { static uint32_t merge_sync_flags(const uint32_t a, const uint32_t b) {
uint32_t r = a | b; uint32_t r = a | b;
/* avoid false MDBX_UTTERLY_NOSYNC */
if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) &&
!F_ISSET(b, MDBX_UTTERLY_NOSYNC)) !F_ISSET(b, MDBX_UTTERLY_NOSYNC))
r -= (r & MDBX_WRITEMAP) ? MDBX_UTTERLY_NOSYNC ^ MDBX_MAPASYNC r = (r - MDBX_UTTERLY_NOSYNC) | MDBX_SAFE_NOSYNC;
: MDBX_UTTERLY_NOSYNC ^ MDBX_SAFE_NOSYNC;
/* convert MDBX_DEPRECATED_MAPASYNC to MDBX_SAFE_NOSYNC */
if ((r & (MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC)) ==
(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC))
r = (r - MDBX_DEPRECATED_MAPASYNC) | MDBX_SAFE_NOSYNC;
/* force MDBX_NOMETASYNC if MDBX_SAFE_NOSYNC enabled */
if (r & MDBX_SAFE_NOSYNC)
r |= MDBX_NOMETASYNC;
assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) &&
!F_ISSET(a, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) &&
!F_ISSET(b, MDBX_UTTERLY_NOSYNC))); !F_ISSET(b, MDBX_UTTERLY_NOSYNC)));
@ -10152,7 +10141,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
/* pickup previously mdbx_env_set_flags(), /* pickup previously mdbx_env_set_flags(),
* but avoid MDBX_UTTERLY_NOSYNC by disjunction */ * but avoid MDBX_UTTERLY_NOSYNC by disjunction */
flags = merge_flags(flags, env->me_flags); flags = merge_sync_flags(flags, env->me_flags);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
const DWORD dwAttrib = GetFileAttributesW(pathnameW); const DWORD dwAttrib = GetFileAttributesW(pathnameW);
@ -10231,9 +10220,9 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
if (flags & MDBX_RDONLY) { if (flags & MDBX_RDONLY) {
/* LY: silently ignore irrelevant flags when /* LY: silently ignore irrelevant flags when
* we're only getting read access */ * we're only getting read access */
flags &= flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC |
~(MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_NOMETASYNC | MDBX_COALESCE | MDBX_LIFORECLAIM |
MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE); MDBX_NOMEMINIT | MDBX_ACCEDE);
} else { } else {
#if MDBX_MMAP_INCOHERENT_FILE_WRITE #if MDBX_MMAP_INCOHERENT_FILE_WRITE
/* Temporary `workaround` for OpenBSD kernel's flaw. /* Temporary `workaround` for OpenBSD kernel's flaw.
@ -10315,7 +10304,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
goto bailout; goto bailout;
} }
const unsigned rigorous_flags = MDBX_SAFE_NOSYNC | MDBX_MAPASYNC; const unsigned rigorous_flags = MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC;
const unsigned mode_flags = rigorous_flags | MDBX_NOMETASYNC | const unsigned mode_flags = rigorous_flags | MDBX_NOMETASYNC |
MDBX_LIFORECLAIM | MDBX_COALESCE | MDBX_NORDAHEAD; MDBX_LIFORECLAIM | MDBX_COALESCE | MDBX_NORDAHEAD;
@ -16073,14 +16062,14 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
if (!dest_is_pipe) { if (!dest_is_pipe) {
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = mdbx_filesync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); rc = mdbx_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE);
/* Write actual meta */ /* Write actual meta */
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = mdbx_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); rc = mdbx_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0);
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = mdbx_filesync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); rc = mdbx_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
} }
mdbx_memalign_free(buffer); mdbx_memalign_free(buffer);
@ -16141,7 +16130,7 @@ int __cold mdbx_env_set_flags(MDBX_env *env, unsigned flags, int onoff) {
return rc; return rc;
if (onoff) if (onoff)
env->me_flags = merge_flags(env->me_flags, flags); env->me_flags = merge_sync_flags(env->me_flags, flags);
else else
env->me_flags &= ~flags; env->me_flags &= ~flags;

View File

@ -908,6 +908,8 @@ struct MDBX_env {
#define MDBX_ENV_ACTIVE UINT32_C(0x20000000) #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
/* me_txkey is set */ /* me_txkey is set */
#define MDBX_ENV_TXKEY UINT32_C(0x10000000) #define MDBX_ENV_TXKEY UINT32_C(0x10000000)
/* Legacy MDBX_MAPASYNC (prior v0.9) */
#define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000)
#define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
uint32_t me_flags; uint32_t me_flags;
mdbx_mmap_t me_dxb_mmap; /* The main data file */ mdbx_mmap_t me_dxb_mmap; /* The main data file */
@ -1390,8 +1392,8 @@ ceil_powerof2(size_t value, size_t granularity) {
* at runtime. Changing other flags requires closing the * at runtime. Changing other flags requires closing the
* environment and re-opening it with the new flags. */ * environment and re-opening it with the new flags. */
#define ENV_CHANGEABLE_FLAGS \ #define ENV_CHANGEABLE_FLAGS \
(MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC | MDBX_NOMEMINIT | \ (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC | \
MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE) MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE)
#define ENV_CHANGELESS_FLAGS \ #define ENV_CHANGELESS_FLAGS \
(MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)

View File

@ -783,13 +783,12 @@ int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
#endif #endif
} }
MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd, MDBX_INTERNAL_FUNC int mdbx_fsync(mdbx_filehandle_t fd,
enum mdbx_syncmode_bits mode_bits) { enum mdbx_syncmode_bits mode_bits) {
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
return ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) == 0 || if ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) && !FlushFileBuffers(fd))
FlushFileBuffers(fd)) return GetLastError();
? MDBX_SUCCESS return MDBX_SUCCESS;
: GetLastError();
#else #else
#if defined(__APPLE__) && \ #if defined(__APPLE__) && \
@ -797,30 +796,37 @@ MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd,
if (mode_bits & MDBX_SYNC_IODQ) if (mode_bits & MDBX_SYNC_IODQ)
return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno; return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno;
#endif /* MacOS */ #endif /* MacOS */
#if defined(__linux__) || defined(__gnu_linux__)
if (mode_bits == MDBX_SYNC_SIZE && mdbx_linux_kernel_version >= 0x03060000) /* LY: This approach is always safe and without appreciable performance
return MDBX_SUCCESS; * degradation, even on a kernel with fdatasync's bug.
#endif /* Linux */ *
int rc; * For more info about of a corresponding fdatasync() bug
do { * see http://www.spinics.net/lists/linux-ext4/msg33714.html */
while (1) {
switch (mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_SIZE)) {
case MDBX_SYNC_NONE:
return MDBX_SUCCESS /* nothing to do */;
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0 #if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
/* LY: This code is always safe and without appreciable performance case MDBX_SYNC_DATA:
* degradation, even on a kernel with fdatasync's bug.
*
* For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
if ((mode_bits & MDBX_SYNC_SIZE) == 0) {
if (fdatasync(fd) == 0) if (fdatasync(fd) == 0)
return MDBX_SUCCESS; return MDBX_SUCCESS;
} else break /* error */;
#else #if defined(__linux__) || defined(__gnu_linux__)
(void)mode_bits; case MDBX_SYNC_SIZE:
#endif if (mdbx_linux_kernel_version >= 0x03060000)
if (fsync(fd) == 0) return MDBX_SUCCESS;
return MDBX_SUCCESS; __fallthrough /* fall through */;
rc = errno; #endif /* Linux */
} while (rc == EINTR); #endif /* _POSIX_SYNCHRONIZED_IO > 0 */
return rc; default:
if (fsync(fd) == 0)
return MDBX_SUCCESS;
}
int rc = errno;
if (rc != EINTR)
return rc;
}
#endif #endif
} }
@ -938,24 +944,24 @@ MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread) {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset, MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset,
size_t length, int async) { size_t length,
enum mdbx_syncmode_bits mode_bits) {
uint8_t *ptr = (uint8_t *)map->address + offset; uint8_t *ptr = (uint8_t *)map->address + offset;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if (FlushViewOfFile(ptr, length) && (async || FlushFileBuffers(map->fd))) if (!FlushViewOfFile(ptr, length))
return MDBX_SUCCESS; return GetLastError();
return GetLastError();
#else #else
#if defined(__linux__) || defined(__gnu_linux__) #if defined(__linux__) || defined(__gnu_linux__)
if (async && mdbx_linux_kernel_version > 0x02061300) if (mode_bits == MDBX_SYNC_NONE && mdbx_linux_kernel_version > 0x02061300)
/* Since Linux 2.6.19, MS_ASYNC is in fact a no-op, /* Since Linux 2.6.19, MS_ASYNC is in fact a no-op. The kernel properly
since the kernel properly tracks dirty pages and flushes them to storage * tracks dirty pages and flushes them to storage as necessary. */
as necessary. */
return MDBX_SUCCESS; return MDBX_SUCCESS;
#endif /* Linux */ #endif /* Linux */
const int mode = async ? MS_ASYNC : MS_SYNC; if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
int rc = (msync(ptr, length, mode) == 0) ? MDBX_SUCCESS : errno; return errno;
return rc; mode_bits &= ~MDBX_SYNC_DATA;
#endif #endif
return mdbx_fsync(map->fd, mode_bits);
} }
MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle, MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle,

View File

@ -572,13 +572,14 @@ mdbx_thread_create(mdbx_thread_t *thread,
MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread); MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread);
enum mdbx_syncmode_bits { enum mdbx_syncmode_bits {
MDBX_SYNC_NONE = 0,
MDBX_SYNC_DATA = 1, MDBX_SYNC_DATA = 1,
MDBX_SYNC_SIZE = 2, MDBX_SYNC_SIZE = 2,
MDBX_SYNC_IODQ = 4 MDBX_SYNC_IODQ = 4
}; };
MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd, MDBX_INTERNAL_FUNC int mdbx_fsync(mdbx_filehandle_t fd,
const enum mdbx_syncmode_bits mode_bits); const enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length); MDBX_INTERNAL_FUNC int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
MDBX_INTERNAL_FUNC int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos); MDBX_INTERNAL_FUNC int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos);
MDBX_INTERNAL_FUNC int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length); MDBX_INTERNAL_FUNC int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length);
@ -635,7 +636,8 @@ MDBX_INTERNAL_FUNC int
mdbx_resume_threads_after_remap(mdbx_handle_array_t *array); mdbx_resume_threads_after_remap(mdbx_handle_array_t *array);
#endif /* Windows */ #endif /* Windows */
MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset, MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset,
size_t length, int async); size_t length,
enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle, MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle,
const char *pathname, int err); const char *pathname, int err);

View File

@ -298,7 +298,6 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option,
const struct option_verb mode_bits[] = { const struct option_verb mode_bits[] = {
{"rdonly", unsigned(MDBX_RDONLY)}, {"rdonly", unsigned(MDBX_RDONLY)},
{"mapasync", unsigned(MDBX_MAPASYNC)},
{"nosync-utterly", unsigned(MDBX_UTTERLY_NOSYNC)}, {"nosync-utterly", unsigned(MDBX_UTTERLY_NOSYNC)},
{"nosubdir", unsigned(MDBX_NOSUBDIR)}, {"nosubdir", unsigned(MDBX_NOSUBDIR)},
{"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)}, {"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)},

View File

@ -95,7 +95,6 @@ void __noreturn usage(void) {
" coalesce == MDBX_COALESCE\n" " coalesce == MDBX_COALESCE\n"
" nosync-safe == MDBX_SAFE_NOSYNC\n" " nosync-safe == MDBX_SAFE_NOSYNC\n"
" writemap == MDBX_WRITEMAP\n" " writemap == MDBX_WRITEMAP\n"
" mapasync == MDBX_MAPASYNC\n"
" nosync-utterly == MDBX_UTTERLY_NOSYNC\n" " nosync-utterly == MDBX_UTTERLY_NOSYNC\n"
" perturb == MDBX_PAGEPERTURB\n" " perturb == MDBX_PAGEPERTURB\n"
" notls == MDBX_NOTLS\n" " notls == MDBX_NOTLS\n"
@ -125,8 +124,8 @@ void actor_params::set_defaults(const std::string &tmpdir) {
#endif #endif
pathname_db = tmpdir + "mdbx-test.db"; pathname_db = tmpdir + "mdbx-test.db";
mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOMEMINIT | mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_SAFE_NOSYNC |
MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_ACCEDE; MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_ACCEDE;
table_flags = MDBX_DUPSORT; table_flags = MDBX_DUPSORT;
size_lower = -1; size_lower = -1;

View File

@ -77,8 +77,7 @@ bool testcase_nested::teardown() {
void testcase_nested::push_txn() { void testcase_nested::push_txn() {
MDBX_txn *txn; MDBX_txn *txn;
unsigned flags = unsigned flags = prng32() & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC);
prng32() & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC);
int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn); int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
failure_perror("mdbx_txn_begin(nested)", err); failure_perror("mdbx_txn_begin(nested)", err);

View File

@ -109,8 +109,8 @@ static void db_connect() {
env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1)); env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1));
MDBX_CHECK(mdbx_env_set_maxdbs(env, 30)); MDBX_CHECK(mdbx_env_set_maxdbs(env, 30));
MDBX_CHECK(mdbx_env_open(env, opt_db_path, MDBX_CHECK(mdbx_env_open(env, opt_db_path,
MDBX_CREATE | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_CREATE | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC |
MDBX_SAFE_NOSYNC | MDBX_LIFORECLAIM, MDBX_LIFORECLAIM,
0664)); 0664));
MDBX_txn *txn; MDBX_txn *txn;