mdbx: drop/deprecate MDBX_MAPASYNC.

Change-Id: I472f97f568a32325eb056c8ee4d2f2350a473bda
This commit is contained in:
Leonid Yuriev 2020-08-01 19:13:17 +03:00
parent 135bead730
commit 5e43ee61a2
11 changed files with 182 additions and 214 deletions

View File

@ -155,7 +155,7 @@ check: test dist
test: build-test test: build-test
rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \
(./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=$(TEST_ITER) --pathname=$(TEST_DB) --dont-cleanup-after basic && \ (./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=$(TEST_ITER) --pathname=$(TEST_DB) --dont-cleanup-after basic && \
./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=12 --pathname=$(TEST_DB) --dont-cleanup-after basic) \ ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=12 --pathname=$(TEST_DB) --dont-cleanup-after basic) \
| tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \
&& ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy && ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy
@ -163,7 +163,7 @@ test-singleprocess: all mdbx_test
rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \
(./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \ (./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \
./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ ./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \
./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --nested) \ ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --nested) \
| tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \
&& ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy && ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy
@ -178,7 +178,7 @@ memcheck test-valgrind:
rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ( \ rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ( \
$(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after basic && \
$(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ $(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \
$(VALGRIND) ./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ $(VALGRIND) ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after basic && \
$(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \ $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \
$(VALGRIND) ./mdbx_chk -vvn $(TEST_DB)-copy \ $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB)-copy \
) | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) ) | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42)

View File

@ -259,9 +259,13 @@ pair, to the first, to the last, or not set to anything.
## Other fixes and specifics ## Other fixes and specifics
1. Fixed more than 10 significant errors, in particular: page leaks, wrong sub-database statistics, segfault in several conditions, nonoptimal page merge strategy, updating an existing record with a change in data size (including for multimap), etc. 1. Fixed more than 10 significant errors, in particular: page leaks,
wrong sub-database statistics, segfault in several conditions,
nonoptimal page merge strategy, updating an existing record with
a change in data size (including for multimap), etc.
2. All cursors can be reused and should be closed explicitly, regardless ones were opened within a write or read transaction. 2. All cursors can be reused and should be closed explicitly,
regardless ones were opened within a write or read transaction.
3. Opening database handles are spared from race conditions and 3. Opening database handles are spared from race conditions and
pre-opening is not needed. pre-opening is not needed.
@ -269,10 +273,9 @@ pre-opening is not needed.
4. Returning `MDBX_EMULTIVAL` error in case of ambiguous update or delete. 4. Returning `MDBX_EMULTIVAL` error in case of ambiguous update or delete.
5. Guarantee of database integrity even in asynchronous unordered write-to-disk mode. 5. Guarantee of database integrity even in asynchronous unordered write-to-disk mode.
> _libmdbx_ propose additional trade-off by implementing append-like manner for updates > _libmdbx_ propose additional trade-off by `MDBX_SAFE_NOSYNC` with append-like manner for updates,
> in `MDBX_SAFE_NOSYNC` and `MDBX_WRITEMAP|MDBX_MAPASYNC` modes, that avoid database corruption after a system crash > that avoids database corruption after a system crash contrary to LMDB.
> contrary to LMDB. Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode is available to match LMDB behaviour, > Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode is available to match behaviour of the `MDB_NOSYNC` in LMDB.
> and for special use-cases.
6. On **MacOS & iOS** the `fcntl(F_FULLFSYNC)` syscall is used _by 6. On **MacOS & iOS** the `fcntl(F_FULLFSYNC)` syscall is used _by
default_ to synchronize data with the disk, as this is [the only way to default_ to synchronize data with the disk, as this is [the only way to

111
mdbx.h
View File

@ -809,13 +809,12 @@ enum MDBX_env_flags_t {
* series of write transactions, will be as small as possible. Thus creates * series of write transactions, will be as small as possible. Thus creates
* ideal conditions for the efficient operation of the disk write-back cache. * ideal conditions for the efficient operation of the disk write-back cache.
* *
* \ref MDBX_LIFORECLAIM is compatible with all no-sync flags (i.e. * \ref MDBX_LIFORECLAIM is compatible with all no-sync flags, but gives NO
* \ref MDBX_NOMETASYNC, \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC, \ref * noticeable impact in combination with \ref MDBX_SAFE_NOSYNC or
* MDBX_MAPASYNC), but gives no noticeable impact in combination with \ref * \ref MDBX_UTTERLY_NOSYNC. Because MDBX will reused pages only before the
* MDBX_SAFE_NOSYNC. Because MDBX will reused pages only before the last * last "steady" MVCC-snapshot, i.e. the loop length of database pages
* "steady" MVCC-snapshot, i.e. the loop length of database pages circulation * circulation will be mostly defined by frequency of calling
* will be mostly defined by frequency of calling `mdbx_env_sync()` rather * `mdbx_env_sync()` rather than LIFO and FIFO difference.
* than LIFO and FIFO difference.
* *
* This flag may be changed at any time using mdbx_env_set_flags(). */ * This flag may be changed at any time using mdbx_env_set_flags(). */
MDBX_LIFORECLAIM = UINT32_C(0x4000000), MDBX_LIFORECLAIM = UINT32_C(0x4000000),
@ -827,9 +826,9 @@ enum MDBX_env_flags_t {
/** \defgroup sync_modes SYNC MODES /** \defgroup sync_modes SYNC MODES
* *
* \attention Using any combination of \ref MDBX_SAFE_NOSYNC, \ref * \attention Using any combination of \ref MDBX_SAFE_NOSYNC, \ref
* MDBX_NOMETASYNC, \ref MDBX_MAPASYNC and especially \ref MDBX_UTTERLY_NOSYNC * MDBX_NOMETASYNC and especially \ref MDBX_UTTERLY_NOSYNC is always a deal to
* is always a deal to reduce durability for gain write performance. You must * reduce durability for gain write performance. You must know exactly what
* know exactly what you are doing and what risks you are taking! * you are doing and what risks you are taking!
* *
* \note for LMDB users: \ref MDBX_SAFE_NOSYNC is NOT similar to LMDB_NOSYNC, * \note for LMDB users: \ref MDBX_SAFE_NOSYNC is NOT similar to LMDB_NOSYNC,
* but \ref MDBX_UTTERLY_NOSYNC is exactly match LMDB_NOSYNC. See details * but \ref MDBX_UTTERLY_NOSYNC is exactly match LMDB_NOSYNC. See details
@ -863,7 +862,6 @@ enum MDBX_env_flags_t {
* *
* \see MDBX_NOMETASYNC * \see MDBX_NOMETASYNC
* \see MDBX_SAFE_NOSYNC * \see MDBX_SAFE_NOSYNC
* \see MDBX_MAPASYNC
* \see MDBX_UTTERLY_NOSYNC * \see MDBX_UTTERLY_NOSYNC
* *
* @{ */ * @{ */
@ -893,13 +891,14 @@ enum MDBX_env_flags_t {
* huge difference in how are recycled the MVCC snapshots corresponding to * huge difference in how are recycled the MVCC snapshots corresponding to
* previous "steady" transactions (see below). * previous "steady" transactions (see below).
* *
* With \ref MDBX_WRITEMAP the `MDBX_SAFE_NOSYNC` instructs MDBX to use
* asynchronous mmap-flushes to disk. Asynchronous mmap-flushes means that
* actually all writes will scheduled and performed by operation system on it
* own manner, i.e. unordered. MDBX itself just notify operating system that
* it would be nice to write data to disk, but no more.
*
* Depending on the platform and hardware, with `MDBX_SAFE_NOSYNC` you may get * Depending on the platform and hardware, with `MDBX_SAFE_NOSYNC` you may get
* a multiple increase of write performance, even 10 times or more. \note Note * a multiple increase of write performance, even 10 times or more.
* that (`MDBX_SAFE_NOSYNC` | \ref MDBX_WRITEMAP) leaves the system with no
* hint for when to write transactions to disk. Therefore the
* (\ref MDBX_MAPASYNC | \ref MDBX_WRITEMAP) may be preferable, but without
* `MDBX_SAFE_NOSYNC` because the (\ref MDBX_MAPASYNC | `MDBX_SAFE_NOSYNC`)
* actually gives \ref MDBX_UTTERLY_NOSYNC.
* *
* In contrast to \ref MDBX_UTTERLY_NOSYNC mode, with `MDBX_SAFE_NOSYNC` flag * In contrast to \ref MDBX_UTTERLY_NOSYNC mode, with `MDBX_SAFE_NOSYNC` flag
* MDBX will keeps untouched pages within B-tree of the last transaction * MDBX will keeps untouched pages within B-tree of the last transaction
@ -933,43 +932,15 @@ enum MDBX_env_flags_t {
* *
* `MDBX_SAFE_NOSYNC` flag may be changed at any time using * `MDBX_SAFE_NOSYNC` flag may be changed at any time using
* \ref mdbx_env_set_flags() or by passing to \ref mdbx_txn_begin() for * \ref mdbx_env_set_flags() or by passing to \ref mdbx_txn_begin() for
* particular write transaction. * particular write transaction. */
*
* \warning don't combine this flag with \ref MDBX_MAPASYNC since you will got
* \ref MDBX_UTTERLY_NOSYNC in that way. \see sync_modes */
MDBX_SAFE_NOSYNC = UINT32_C(0x10000), MDBX_SAFE_NOSYNC = UINT32_C(0x10000),
/** Use asynchronous msync when \ref MDBX_WRITEMAP is used. /** \deprecated Please use \ref MDBX_SAFE_NOSYNC instead of `MDBX_MAPASYNC`.
* *
* `MDBX_MAPASYNC` meaningful and give effect only in conjunction * Since version 0.9.x the `MDBX_MAPASYNC` is deprecated and has the same
* with `MDBX_WRITEMAP` or `MDBX_SAFE_NOSYNC`: * effect as \ref MDBX_SAFE_NOSYNC with \ref MDBX_WRITEMAP. This just API
* - with \ref MDBX_SAFE_NOSYNC actually gives \ref MDBX_UTTERLY_NOSYNC, * simplification is for convenience and clarity. */
* which wipe previous steady commits for reuse pages as described above. MDBX_MAPASYNC = MDBX_SAFE_NOSYNC,
* - with \ref MDBX_WRITEMAP but without \ref MDBX_SAFE_NOSYNC instructs MDBX
* to use asynchronous mmap-flushes to disk as described below.
* - with both \ref MDBX_WRITEMAP and \ref MDBX_SAFE_NOSYNC you get the both
* effects.
*
* Asynchronous mmap-flushes means that actually all writes will scheduled and
* performed by operation system on it own manner, i.e. unordered. MDBX itself
* just notify operating system that it would be nice to write data to disk,
* but no more.
*
* With \ref MDBX_MAPASYNC flag, but without \ref MDBX_UTTERLY_NOSYNC (i.e.
* without OR'ing with \ref MDBX_SAFE_NOSYNC) MDBX will keeps untouched pages
* within B-tree of the last transaction "steady" which was synced to disk
* completely. So, this makes exactly the same "long-lived" impact and the
* same consequences as described above for \ref MDBX_SAFE_NOSYNC flag.
*
* Depending on the platform and hardware, with combination of
* \ref MDBX_WRITEMAP and \ref MDBX_MAPASYNC you may get a multiple increase
* of write performance, even 10-100 times or more. \ref MDBX_MAPASYNC flag
* may be changed at any time using \ref mdbx_env_set_flags() or by passing to
* \ref mdbx_txn_begin() for particular write transaction.
*
* \warning don't combine this flag with \ref MDBX_SAFE_NOSYNC since you will
* got \ref MDBX_UTTERLY_NOSYNC in that way. \see sync_modes */
MDBX_MAPASYNC = UINT32_C(0x100000),
/** Don't sync anything and wipe previous steady commits. /** Don't sync anything and wipe previous steady commits.
* *
@ -1012,7 +983,7 @@ enum MDBX_env_flags_t {
* `MDBX_UTTERLY_NOSYNC` flag may be changed at any time using * `MDBX_UTTERLY_NOSYNC` flag may be changed at any time using
* \ref mdbx_env_set_flags(), but don't has effect if passed to * \ref mdbx_env_set_flags(), but don't has effect if passed to
* \ref mdbx_txn_begin() for particular write transaction. \see sync_modes */ * \ref mdbx_txn_begin() for particular write transaction. \see sync_modes */
MDBX_UTTERLY_NOSYNC = MDBX_SAFE_NOSYNC | MDBX_MAPASYNC, MDBX_UTTERLY_NOSYNC = MDBX_SAFE_NOSYNC | UINT32_C(0x100000),
/** @} end of SYNC MODES */ /** @} end of SYNC MODES */
@ -1459,14 +1430,14 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* \ref MDBX_NOMEMINIT, \ref MDBX_COALESCE, \ref MDBX_LIFORECLAIM. * \ref MDBX_NOMEMINIT, \ref MDBX_COALESCE, \ref MDBX_LIFORECLAIM.
* See \ref env_flags section. * See \ref env_flags section.
* *
* - \ref MDBX_NOMETASYNC, \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC, * - \ref MDBX_NOMETASYNC, \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC.
* \ref MDBX_MAPASYNC. See \ref sync_modes section. * See \ref sync_modes section.
* *
* \note `MDB_NOLOCK` flag don't supported by MDBX, * \note `MDB_NOLOCK` flag don't supported by MDBX,
* try use \ref MDBX_EXCLUSIVE as a replacement. * try use \ref MDBX_EXCLUSIVE as a replacement.
* *
* \note MDBX don't allow to mix processes with different \ref MDBX_SAFE_NOSYNC, * \note MDBX don't allow to mix processes with different \ref MDBX_SAFE_NOSYNC
* \ref MDBX_MAPASYNC flags on the same environment. * flags on the same environment.
* In such case \ref MDBX_INCOMPATIBLE will be returned. * In such case \ref MDBX_INCOMPATIBLE will be returned.
* *
* If the database is already exist and parameters specified early by * If the database is already exist and parameters specified early by
@ -1493,7 +1464,7 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* more than once. * more than once.
* \retval MDBX_INCOMPATIBLE Environment is already opened by another process, * \retval MDBX_INCOMPATIBLE Environment is already opened by another process,
* but with different set of \ref MDBX_SAFE_NOSYNC, * but with different set of \ref MDBX_SAFE_NOSYNC,
* \ref MDBX_MAPASYNC flags. * \ref MDBX_UTTERLY_NOSYNC flags.
* Or if the database is already exist and parameters * Or if the database is already exist and parameters
* specified early by \ref mdbx_env_set_geometry() * specified early by \ref mdbx_env_set_geometry()
* are incompatible (i.e. different pagesize, etc). * are incompatible (i.e. different pagesize, etc).
@ -1697,7 +1668,7 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_env_info(MDBX_env *env, MDBX_envinfo *info,
* \ingroup c_extra * \ingroup c_extra
* *
* Unless the environment was opened with no-sync flags (\ref MDBX_NOMETASYNC, * Unless the environment was opened with no-sync flags (\ref MDBX_NOMETASYNC,
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC and \ref MDBX_MAPASYNC), then * \ref MDBX_SAFE_NOSYNC and \ref MDBX_UTTERLY_NOSYNC), then
* data is always written an flushed to disk when \ref mdbx_txn_commit() is * data is always written an flushed to disk when \ref mdbx_txn_commit() is
* called. Otherwise \ref mdbx_env_sync() may be called to manually write and * called. Otherwise \ref mdbx_env_sync() may be called to manually write and
* flush unsynced data to disk. * flush unsynced data to disk.
@ -1741,8 +1712,7 @@ LIBMDBX_API int mdbx_env_sync(MDBX_env *env);
LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env); LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
/** Sets threshold to force flush the data buffers to disk, even any of /** Sets threshold to force flush the data buffers to disk, even any of
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC and \ref MDBX_MAPASYNC flags in * \ref MDBX_SAFE_NOSYNC flag in the environment.
* the environment.
* \ingroup c_settings * \ingroup c_settings
* *
* The threshold value affects all processes which operates with given * The threshold value affects all processes which operates with given
@ -1752,7 +1722,8 @@ LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
* Data is always written to disk when \ref mdbx_txn_commit() is called, but * Data is always written to disk when \ref mdbx_txn_commit() is called, but
* the operating system may keep it buffered. MDBX always flushes the OS buffers * the operating system may keep it buffered. MDBX always flushes the OS buffers
* upon commit as well, unless the environment was opened with * upon commit as well, unless the environment was opened with
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_MAPASYNC or in part \ref MDBX_NOMETASYNC. * \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC
* or in part \ref MDBX_NOMETASYNC.
* *
* The default is 0, than mean no any threshold checked, and no additional * The default is 0, than mean no any threshold checked, and no additional
* flush will be made. * flush will be made.
@ -1765,8 +1736,7 @@ LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold); LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold);
/** Sets relative period since the last unsteay commit to force flush the data /** Sets relative period since the last unsteay commit to force flush the data
* buffers to disk, even any of \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC and * buffers to disk, even of \ref MDBX_SAFE_NOSYNC flag in the environment.
* \ref MDBX_MAPASYNC flags in the environment.
* \ingroup c_settings * \ingroup c_settings
* *
* The relative period value affects all processes which operates with given * The relative period value affects all processes which operates with given
@ -1776,7 +1746,7 @@ LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold);
* Data is always written to disk when \ref mdbx_txn_commit() is called, but the * Data is always written to disk when \ref mdbx_txn_commit() is called, but the
* operating system may keep it buffered. MDBX always flushes the OS buffers * operating system may keep it buffered. MDBX always flushes the OS buffers
* upon commit as well, unless the environment was opened with * upon commit as well, unless the environment was opened with
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_MAPASYNC or in part \ref MDBX_NOMETASYNC. * \ref MDBX_SAFE_NOSYNC or in part \ref MDBX_NOMETASYNC.
* *
* Settled period don't checked asynchronously, but only by the * Settled period don't checked asynchronously, but only by the
* \ref mdbx_txn_commit() and \ref mdbx_env_sync() functions. Therefore, in * \ref mdbx_txn_commit() and \ref mdbx_env_sync() functions. Therefore, in
@ -1810,11 +1780,11 @@ LIBMDBX_API int mdbx_env_set_syncperiod(MDBX_env *env,
* *
* \param [in] dont_sync A dont'sync flag, if non-zero the last checkpoint * \param [in] dont_sync A dont'sync flag, if non-zero the last checkpoint
* will be kept "as is" and may be still "weak" in the * will be kept "as is" and may be still "weak" in the
* \ref MDBX_UTTERLY_NOSYNC or \ref MDBX_MAPASYNC modes. * \ref MDBX_SAFE_NOSYNC or \ref MDBX_UTTERLY_NOSYNC
* Such "weak" checkpoint will be ignored on opening next * modes. Such "weak" checkpoint will be ignored on
* time, and transactions since the last non-weak * opening next time, and transactions since the last
* checkpoint (meta-page update) will rolledback for * non-weak checkpoint (meta-page update) will rolledback
* consistency guarantee. * for consistency guarantee.
* *
* \returns A non-zero error value on failure and 0 on success, * \returns A non-zero error value on failure and 0 on success,
* some possible errors are: * some possible errors are:
@ -2294,8 +2264,7 @@ LIBMDBX_API void *mdbx_env_get_userctx(const MDBX_env *env);
* - \ref MDBX_TRYTXN Do not block when starting * - \ref MDBX_TRYTXN Do not block when starting
* a write transaction. * a write transaction.
* *
* - \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC or * - \ref MDBX_SAFE_NOSYNC, \ref MDBX_NOMETASYNC.
* \ref MDBX_MAPASYNC.
* Do not sync data to disk corresponding * Do not sync data to disk corresponding
* to \ref MDBX_NOMETASYNC or \ref MDBX_SAFE_NOSYNC * to \ref MDBX_NOMETASYNC or \ref MDBX_SAFE_NOSYNC
* description. \see sync_modes. * description. \see sync_modes.

View File

@ -4922,7 +4922,7 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER)) SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER))
err = errno; err = errno;
#else #else
err = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA); err = mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA);
#endif #endif
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
@ -5646,7 +5646,7 @@ __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
if (outside_txn) { if (outside_txn) {
if (unsynced_pages > /* FIXME: define threshold */ 16 && if (unsynced_pages > /* FIXME: define threshold */ 16 &&
(flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0) { (flags & MDBX_SAFE_NOSYNC) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
const size_t usedbytes = pgno_align2os_bytes(env, head->mm_geo.next); const size_t usedbytes = pgno_align2os_bytes(env, head->mm_geo.next);
@ -5655,7 +5655,7 @@ __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
/* LY: pre-sync without holding lock to reduce latency for writer(s) */ /* LY: pre-sync without holding lock to reduce latency for writer(s) */
int err = (flags & MDBX_WRITEMAP) int err = (flags & MDBX_WRITEMAP)
? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes, false) ? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes, false)
: mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA); : mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
@ -5673,7 +5673,7 @@ __cold static int mdbx_env_sync_internal(MDBX_env *env, int force,
} }
if (!META_IS_STEADY(head) || if (!META_IS_STEADY(head) ||
((flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) { ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) {
mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO, mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO,
data_page(head)->mp_pgno, mdbx_durable_str(head), data_page(head)->mp_pgno, mdbx_durable_str(head),
unsynced_pages); unsynced_pages);
@ -5694,11 +5694,10 @@ fastpath:
if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_NOMETASYNC) != 0) { if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_NOMETASYNC) != 0) {
const txnid_t head_txnid = mdbx_recent_committed_txnid(env); const txnid_t head_txnid = mdbx_recent_committed_txnid(env);
if (*env->me_meta_sync_txnid != (uint32_t)head_txnid) { if (*env->me_meta_sync_txnid != (uint32_t)head_txnid) {
rc = rc = (flags & MDBX_WRITEMAP)
(flags & MDBX_WRITEMAP)
? mdbx_msync(&env->me_dxb_mmap, 0, ? mdbx_msync(&env->me_dxb_mmap, 0,
pgno_align2os_bytes(env, NUM_METAS), false) pgno_align2os_bytes(env, NUM_METAS), false)
: mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); : mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
*env->me_meta_sync_txnid = (uint32_t)head_txnid; *env->me_meta_sync_txnid = (uint32_t)head_txnid;
} }
@ -6480,7 +6479,7 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
mdbx_assert(env, mdbx_assert(env,
(txn->mt_flags & ~(MDBX_NOTLS | MDBX_RDONLY | MDBX_WRITEMAP | (txn->mt_flags & ~(MDBX_NOTLS | MDBX_RDONLY | MDBX_WRITEMAP |
MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0); MDBX_SAFE_NOSYNC)) == 0);
txn->mt_signature = MDBX_MT_SIGNATURE; txn->mt_signature = MDBX_MT_SIGNATURE;
*ret = txn; *ret = txn;
mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO
@ -8584,7 +8583,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0); mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0);
mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now); mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now);
if (flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) { if (flags & MDBX_SAFE_NOSYNC) {
/* Check auto-sync conditions */ /* Check auto-sync conditions */
const pgno_t autosync_threshold = *env->me_autosync_threshold; const pgno_t autosync_threshold = *env->me_autosync_threshold;
const uint64_t autosync_period = *env->me_autosync_period; const uint64_t autosync_period = *env->me_autosync_period;
@ -8681,38 +8680,27 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
} }
/* LY: step#1 - sync previously written/updated data-pages */ /* LY: step#1 - sync previously written/updated data-pages */
int rc = *env->me_unsynced_pages ? MDBX_RESULT_TRUE /* carry non-steady */ int rc = MDBX_RESULT_FALSE /* carry steady */;
: MDBX_RESULT_FALSE /* carry steady */; if (*env->me_unsynced_pages) {
if (rc != MDBX_RESULT_FALSE && (flags & MDBX_SAFE_NOSYNC) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
MDBX_meta *const recent_steady_meta = mdbx_meta_steady(env); enum mdbx_syncmode_bits mode_bits = MDBX_SYNC_NONE;
if (flags & MDBX_WRITEMAP) { if ((flags & MDBX_SAFE_NOSYNC) == 0) {
const size_t begin = mode_bits = MDBX_SYNC_DATA;
floor_powerof2(pgno2bytes(env, NUM_METAS), env->me_os_psize); if (pending->mm_geo.next > mdbx_meta_steady(env)->mm_geo.now)
const size_t end = pgno_align2os_bytes(env, pending->mm_geo.next); mode_bits |= MDBX_SYNC_SIZE;
if (end > begin) { if (flags & MDBX_NOMETASYNC)
rc = mdbx_msync(&env->me_dxb_mmap, begin, end - begin, mode_bits |= MDBX_SYNC_IODQ;
flags & MDBX_MAPASYNC); }
if (flags & MDBX_WRITEMAP)
rc =
mdbx_msync(&env->me_dxb_mmap, 0,
pgno_align2os_bytes(env, pending->mm_geo.next), mode_bits);
else
rc = mdbx_fsync(env->me_lazy_fd, mode_bits);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
} rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */
rc = MDBX_RESULT_TRUE /* carry non-steady */; : MDBX_RESULT_FALSE /* carry steady */;
if ((flags & MDBX_MAPASYNC) == 0) {
if (unlikely(pending->mm_geo.next > recent_steady_meta->mm_geo.now)) {
rc = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_SIZE);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
rc = MDBX_RESULT_FALSE /* carry steady */;
}
} else {
rc = mdbx_filesync(env->me_lazy_fd,
(pending->mm_geo.next > recent_steady_meta->mm_geo.now)
? MDBX_SYNC_DATA | MDBX_SYNC_SIZE
: MDBX_SYNC_DATA);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
} }
/* Steady or Weak */ /* Steady or Weak */
@ -8825,24 +8813,13 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
target->mm_datasync_sign = pending->mm_datasync_sign; target->mm_datasync_sign = pending->mm_datasync_sign;
mdbx_flush_incoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
if ((flags & MDBX_SAFE_NOSYNC) == 0) {
/* sync meta-pages */ /* sync meta-pages */
const bool weak = (flags & (MDBX_MAPASYNC | MDBX_NOMETASYNC)) != 0; rc =
rc = mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS), mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
weak); (flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE
: MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
if (!weak) {
#if defined(__APPLE__) && \
MDBX_OSX_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY
rc = likely(fcntl(env->me_lazy_fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS
: errno;
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
#endif /* MacOS */
*env->me_meta_sync_txnid = pending->mm_txnid_a.low;
}
}
} else { } else {
const MDBX_meta undo_meta = *target; const MDBX_meta undo_meta = *target;
const mdbx_filehandle_t fd = (env->me_dsync_fd != INVALID_HANDLE_VALUE) const mdbx_filehandle_t fd = (env->me_dsync_fd != INVALID_HANDLE_VALUE)
@ -8860,16 +8837,17 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
goto fail; goto fail;
} }
mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize); mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize);
if ((flags & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC)) == 0) {
/* sync meta-pages */ /* sync meta-pages */
if (fd == env->me_lazy_fd) { if ((flags & MDBX_NOMETASYNC) == 0 && fd == env->me_lazy_fd) {
rc = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); rc = mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
goto undo; goto undo;
} }
}
if (flags & MDBX_NOMETASYNC)
*env->me_unsynced_pages += 1;
else
*env->me_meta_sync_txnid = pending->mm_txnid_a.low; *env->me_meta_sync_txnid = pending->mm_txnid_a.low;
}
}
/* LY: shrink datafile if needed */ /* LY: shrink datafile if needed */
if (unlikely(shrink)) { if (unlikely(shrink)) {
@ -9951,7 +9929,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
mdbx_error("initial-%s for lck-file failed", "msync"); mdbx_error("initial-%s for lck-file failed", "msync");
goto bailout; goto bailout;
} }
err = mdbx_filesync(env->me_lck_mmap.fd, MDBX_SYNC_SIZE); err = mdbx_fsync(env->me_lck_mmap.fd, MDBX_SYNC_SIZE);
if (unlikely(err != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
mdbx_error("initial-%s for lck-file failed", "fsync"); mdbx_error("initial-%s for lck-file failed", "fsync");
goto bailout; goto bailout;
@ -10112,13 +10090,24 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) {
: MDBX_RESULT_TRUE; : MDBX_RESULT_TRUE;
} }
/* Merge flags and avoid false MDBX_UTTERLY_NOSYNC */ /* Merge sync flags */
static uint32_t merge_flags(const uint32_t a, const uint32_t b) { static uint32_t merge_sync_flags(const uint32_t a, const uint32_t b) {
uint32_t r = a | b; uint32_t r = a | b;
/* avoid false MDBX_UTTERLY_NOSYNC */
if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) &&
!F_ISSET(b, MDBX_UTTERLY_NOSYNC)) !F_ISSET(b, MDBX_UTTERLY_NOSYNC))
r -= (r & MDBX_WRITEMAP) ? MDBX_UTTERLY_NOSYNC ^ MDBX_MAPASYNC r = (r - MDBX_UTTERLY_NOSYNC) | MDBX_SAFE_NOSYNC;
: MDBX_UTTERLY_NOSYNC ^ MDBX_SAFE_NOSYNC;
/* convert MDBX_DEPRECATED_MAPASYNC to MDBX_SAFE_NOSYNC */
if ((r & (MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC)) ==
(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC))
r = (r - MDBX_DEPRECATED_MAPASYNC) | MDBX_SAFE_NOSYNC;
/* force MDBX_NOMETASYNC if MDBX_SAFE_NOSYNC enabled */
if (r & MDBX_SAFE_NOSYNC)
r |= MDBX_NOMETASYNC;
assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) &&
!F_ISSET(a, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) &&
!F_ISSET(b, MDBX_UTTERLY_NOSYNC))); !F_ISSET(b, MDBX_UTTERLY_NOSYNC)));
@ -10152,7 +10141,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
/* pickup previously mdbx_env_set_flags(), /* pickup previously mdbx_env_set_flags(),
* but avoid MDBX_UTTERLY_NOSYNC by disjunction */ * but avoid MDBX_UTTERLY_NOSYNC by disjunction */
flags = merge_flags(flags, env->me_flags); flags = merge_sync_flags(flags, env->me_flags);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
const DWORD dwAttrib = GetFileAttributesW(pathnameW); const DWORD dwAttrib = GetFileAttributesW(pathnameW);
@ -10231,9 +10220,9 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
if (flags & MDBX_RDONLY) { if (flags & MDBX_RDONLY) {
/* LY: silently ignore irrelevant flags when /* LY: silently ignore irrelevant flags when
* we're only getting read access */ * we're only getting read access */
flags &= flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC |
~(MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_NOMETASYNC | MDBX_COALESCE | MDBX_LIFORECLAIM |
MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE); MDBX_NOMEMINIT | MDBX_ACCEDE);
} else { } else {
#if MDBX_MMAP_INCOHERENT_FILE_WRITE #if MDBX_MMAP_INCOHERENT_FILE_WRITE
/* Temporary `workaround` for OpenBSD kernel's flaw. /* Temporary `workaround` for OpenBSD kernel's flaw.
@ -10315,7 +10304,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
goto bailout; goto bailout;
} }
const unsigned rigorous_flags = MDBX_SAFE_NOSYNC | MDBX_MAPASYNC; const unsigned rigorous_flags = MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC;
const unsigned mode_flags = rigorous_flags | MDBX_NOMETASYNC | const unsigned mode_flags = rigorous_flags | MDBX_NOMETASYNC |
MDBX_LIFORECLAIM | MDBX_COALESCE | MDBX_NORDAHEAD; MDBX_LIFORECLAIM | MDBX_COALESCE | MDBX_NORDAHEAD;
@ -16073,14 +16062,14 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
if (!dest_is_pipe) { if (!dest_is_pipe) {
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = mdbx_filesync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); rc = mdbx_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE);
/* Write actual meta */ /* Write actual meta */
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = mdbx_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); rc = mdbx_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0);
if (likely(rc == MDBX_SUCCESS)) if (likely(rc == MDBX_SUCCESS))
rc = mdbx_filesync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); rc = mdbx_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
} }
mdbx_memalign_free(buffer); mdbx_memalign_free(buffer);
@ -16141,7 +16130,7 @@ int __cold mdbx_env_set_flags(MDBX_env *env, unsigned flags, int onoff) {
return rc; return rc;
if (onoff) if (onoff)
env->me_flags = merge_flags(env->me_flags, flags); env->me_flags = merge_sync_flags(env->me_flags, flags);
else else
env->me_flags &= ~flags; env->me_flags &= ~flags;

View File

@ -908,6 +908,8 @@ struct MDBX_env {
#define MDBX_ENV_ACTIVE UINT32_C(0x20000000) #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
/* me_txkey is set */ /* me_txkey is set */
#define MDBX_ENV_TXKEY UINT32_C(0x10000000) #define MDBX_ENV_TXKEY UINT32_C(0x10000000)
/* Legacy MDBX_MAPASYNC (prior v0.9) */
#define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000)
#define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
uint32_t me_flags; uint32_t me_flags;
mdbx_mmap_t me_dxb_mmap; /* The main data file */ mdbx_mmap_t me_dxb_mmap; /* The main data file */
@ -1390,8 +1392,8 @@ ceil_powerof2(size_t value, size_t granularity) {
* at runtime. Changing other flags requires closing the * at runtime. Changing other flags requires closing the
* environment and re-opening it with the new flags. */ * environment and re-opening it with the new flags. */
#define ENV_CHANGEABLE_FLAGS \ #define ENV_CHANGEABLE_FLAGS \
(MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC | MDBX_NOMEMINIT | \ (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_DEPRECATED_MAPASYNC | \
MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE) MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE)
#define ENV_CHANGELESS_FLAGS \ #define ENV_CHANGELESS_FLAGS \
(MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) MDBX_LIFORECLAIM | MDBX_EXCLUSIVE)

View File

@ -783,13 +783,12 @@ int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
#endif #endif
} }
MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd, MDBX_INTERNAL_FUNC int mdbx_fsync(mdbx_filehandle_t fd,
enum mdbx_syncmode_bits mode_bits) { enum mdbx_syncmode_bits mode_bits) {
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
return ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) == 0 || if ((mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_IODQ)) && !FlushFileBuffers(fd))
FlushFileBuffers(fd)) return GetLastError();
? MDBX_SUCCESS return MDBX_SUCCESS;
: GetLastError();
#else #else
#if defined(__APPLE__) && \ #if defined(__APPLE__) && \
@ -797,30 +796,37 @@ MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd,
if (mode_bits & MDBX_SYNC_IODQ) if (mode_bits & MDBX_SYNC_IODQ)
return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno; return likely(fcntl(fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno;
#endif /* MacOS */ #endif /* MacOS */
#if defined(__linux__) || defined(__gnu_linux__)
if (mode_bits == MDBX_SYNC_SIZE && mdbx_linux_kernel_version >= 0x03060000) /* LY: This approach is always safe and without appreciable performance
return MDBX_SUCCESS;
#endif /* Linux */
int rc;
do {
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
/* LY: This code is always safe and without appreciable performance
* degradation, even on a kernel with fdatasync's bug. * degradation, even on a kernel with fdatasync's bug.
* *
* For more info about of a corresponding fdatasync() bug * For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */ * see http://www.spinics.net/lists/linux-ext4/msg33714.html */
if ((mode_bits & MDBX_SYNC_SIZE) == 0) { while (1) {
switch (mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_SIZE)) {
case MDBX_SYNC_NONE:
return MDBX_SUCCESS /* nothing to do */;
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
case MDBX_SYNC_DATA:
if (fdatasync(fd) == 0) if (fdatasync(fd) == 0)
return MDBX_SUCCESS; return MDBX_SUCCESS;
} else break /* error */;
#else #if defined(__linux__) || defined(__gnu_linux__)
(void)mode_bits; case MDBX_SYNC_SIZE:
#endif if (mdbx_linux_kernel_version >= 0x03060000)
return MDBX_SUCCESS;
__fallthrough /* fall through */;
#endif /* Linux */
#endif /* _POSIX_SYNCHRONIZED_IO > 0 */
default:
if (fsync(fd) == 0) if (fsync(fd) == 0)
return MDBX_SUCCESS; return MDBX_SUCCESS;
rc = errno; }
} while (rc == EINTR);
int rc = errno;
if (rc != EINTR)
return rc; return rc;
}
#endif #endif
} }
@ -938,24 +944,24 @@ MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread) {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset, MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset,
size_t length, int async) { size_t length,
enum mdbx_syncmode_bits mode_bits) {
uint8_t *ptr = (uint8_t *)map->address + offset; uint8_t *ptr = (uint8_t *)map->address + offset;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if (FlushViewOfFile(ptr, length) && (async || FlushFileBuffers(map->fd))) if (!FlushViewOfFile(ptr, length))
return MDBX_SUCCESS;
return GetLastError(); return GetLastError();
#else #else
#if defined(__linux__) || defined(__gnu_linux__) #if defined(__linux__) || defined(__gnu_linux__)
if (async && mdbx_linux_kernel_version > 0x02061300) if (mode_bits == MDBX_SYNC_NONE && mdbx_linux_kernel_version > 0x02061300)
/* Since Linux 2.6.19, MS_ASYNC is in fact a no-op, /* Since Linux 2.6.19, MS_ASYNC is in fact a no-op. The kernel properly
since the kernel properly tracks dirty pages and flushes them to storage * tracks dirty pages and flushes them to storage as necessary. */
as necessary. */
return MDBX_SUCCESS; return MDBX_SUCCESS;
#endif /* Linux */ #endif /* Linux */
const int mode = async ? MS_ASYNC : MS_SYNC; if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
int rc = (msync(ptr, length, mode) == 0) ? MDBX_SUCCESS : errno; return errno;
return rc; mode_bits &= ~MDBX_SYNC_DATA;
#endif #endif
return mdbx_fsync(map->fd, mode_bits);
} }
MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle, MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle,

View File

@ -572,12 +572,13 @@ mdbx_thread_create(mdbx_thread_t *thread,
MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread); MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread);
enum mdbx_syncmode_bits { enum mdbx_syncmode_bits {
MDBX_SYNC_NONE = 0,
MDBX_SYNC_DATA = 1, MDBX_SYNC_DATA = 1,
MDBX_SYNC_SIZE = 2, MDBX_SYNC_SIZE = 2,
MDBX_SYNC_IODQ = 4 MDBX_SYNC_IODQ = 4
}; };
MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd, MDBX_INTERNAL_FUNC int mdbx_fsync(mdbx_filehandle_t fd,
const enum mdbx_syncmode_bits mode_bits); const enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length); MDBX_INTERNAL_FUNC int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
MDBX_INTERNAL_FUNC int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos); MDBX_INTERNAL_FUNC int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos);
@ -635,7 +636,8 @@ MDBX_INTERNAL_FUNC int
mdbx_resume_threads_after_remap(mdbx_handle_array_t *array); mdbx_resume_threads_after_remap(mdbx_handle_array_t *array);
#endif /* Windows */ #endif /* Windows */
MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset, MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset,
size_t length, int async); size_t length,
enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle, MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle,
const char *pathname, int err); const char *pathname, int err);

View File

@ -298,7 +298,6 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option,
const struct option_verb mode_bits[] = { const struct option_verb mode_bits[] = {
{"rdonly", unsigned(MDBX_RDONLY)}, {"rdonly", unsigned(MDBX_RDONLY)},
{"mapasync", unsigned(MDBX_MAPASYNC)},
{"nosync-utterly", unsigned(MDBX_UTTERLY_NOSYNC)}, {"nosync-utterly", unsigned(MDBX_UTTERLY_NOSYNC)},
{"nosubdir", unsigned(MDBX_NOSUBDIR)}, {"nosubdir", unsigned(MDBX_NOSUBDIR)},
{"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)}, {"nosync-safe", unsigned(MDBX_SAFE_NOSYNC)},

View File

@ -95,7 +95,6 @@ void __noreturn usage(void) {
" coalesce == MDBX_COALESCE\n" " coalesce == MDBX_COALESCE\n"
" nosync-safe == MDBX_SAFE_NOSYNC\n" " nosync-safe == MDBX_SAFE_NOSYNC\n"
" writemap == MDBX_WRITEMAP\n" " writemap == MDBX_WRITEMAP\n"
" mapasync == MDBX_MAPASYNC\n"
" nosync-utterly == MDBX_UTTERLY_NOSYNC\n" " nosync-utterly == MDBX_UTTERLY_NOSYNC\n"
" perturb == MDBX_PAGEPERTURB\n" " perturb == MDBX_PAGEPERTURB\n"
" notls == MDBX_NOTLS\n" " notls == MDBX_NOTLS\n"
@ -125,8 +124,8 @@ void actor_params::set_defaults(const std::string &tmpdir) {
#endif #endif
pathname_db = tmpdir + "mdbx-test.db"; pathname_db = tmpdir + "mdbx-test.db";
mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOMEMINIT | mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_SAFE_NOSYNC |
MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_ACCEDE; MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_ACCEDE;
table_flags = MDBX_DUPSORT; table_flags = MDBX_DUPSORT;
size_lower = -1; size_lower = -1;

View File

@ -77,8 +77,7 @@ bool testcase_nested::teardown() {
void testcase_nested::push_txn() { void testcase_nested::push_txn() {
MDBX_txn *txn; MDBX_txn *txn;
unsigned flags = unsigned flags = prng32() & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC);
prng32() & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC);
int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn); int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
failure_perror("mdbx_txn_begin(nested)", err); failure_perror("mdbx_txn_begin(nested)", err);

View File

@ -109,8 +109,8 @@ static void db_connect() {
env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1)); env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1));
MDBX_CHECK(mdbx_env_set_maxdbs(env, 30)); MDBX_CHECK(mdbx_env_set_maxdbs(env, 30));
MDBX_CHECK(mdbx_env_open(env, opt_db_path, MDBX_CHECK(mdbx_env_open(env, opt_db_path,
MDBX_CREATE | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_CREATE | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC |
MDBX_SAFE_NOSYNC | MDBX_LIFORECLAIM, MDBX_LIFORECLAIM,
0664)); 0664));
MDBX_txn *txn; MDBX_txn *txn;