From 2b0bfb9eeaddf7abf344aaf210b81fcfefa545b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Oct 2025 11:41:38 +0300 Subject: [PATCH] mdbx: revert/drop `MDBX_DBG_NOFALLOC_INCORE` and introduce `osal_fsetsize()`. This fixes regression after the 2a7f460345edbeb26a51782cbe6af3c55254ae77 as when a DXB file remains longer than necessary on Mac or Linux when building without `_GNU_SOURCE`. --- mdbx.h | 6 +--- src/api-copy.c | 4 +-- src/bits.md | 2 +- src/cogs.h | 4 --- src/dxb.c | 10 ++---- src/lck-windows.c | 2 +- src/options.h | 13 ++++++++ src/osal.c | 80 ++++++++++++++++++++++++++++------------------- src/osal.h | 6 +--- 9 files changed, 69 insertions(+), 58 deletions(-) diff --git a/mdbx.h b/mdbx.h index 9076f273..c679d47d 100644 --- a/mdbx.h +++ b/mdbx.h @@ -929,12 +929,8 @@ typedef enum MDBX_debug_flags { * \note Nonetheless a new write transactions will use and store the last signature regardless this flag */ MDBX_DBG_DONT_UPGRADE = 64, - /** Disables the use of fallocate() for an in-core database(s) to avoid sporadic test failures - * due to lack of space in tmpfs and/or free memory. */ - MDBX_DBG_NOFALLOC_INCORE = 128, - #ifdef ENABLE_UBSAN - MDBX_DBG_MAX = ((unsigned)MDBX_LOG_MAX) << 16 | 255 /* avoid UBSAN false-positive trap by a tests */, + MDBX_DBG_MAX = ((unsigned)MDBX_LOG_MAX) << 16 | 127 /* avoid UBSAN false-positive trap by a tests */, #endif /* ENABLE_UBSAN */ /** for mdbx_setup_debug() only: Don't change current settings */ diff --git a/src/api-copy.c b/src/api-copy.c index 06100b92..347e22cc 100644 --- a/src/api-copy.c +++ b/src/api-copy.c @@ -479,7 +479,7 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_fileha if (meta->geometry.now != meta->geometry.first_unallocated) { const size_t whole_size = pgno2bytes(env, meta->geometry.now); if (!dest_is_pipe) - return osal_fallocate(fd, whole_size); + return osal_fsetsize(fd, whole_size); const size_t used_size = pgno2bytes(env, meta->geometry.first_unallocated); memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); @@ -648,7 +648,7 @@ retry_snap_meta: /* Extend file if required */ if (likely(rc == MDBX_SUCCESS) && whole_size != used_size) { if (!dest_is_pipe) - rc = osal_fallocate(fd, whole_size); + rc = osal_fsetsize(fd, whole_size); else { memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF); for (size_t offset = used_size; rc == MDBX_SUCCESS && offset < whole_size;) { diff --git a/src/bits.md b/src/bits.md index fdb33945..96fae211 100644 --- a/src/bits.md +++ b/src/bits.md @@ -10,7 +10,7 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 7 |0000 0080| |TXN_DRAINED_GC|DB_VALID |ALLDUPS |DBI_LINDO | | | | 8 |0000 0100| _MAY_MOVE |TXN_CURSORS | | | | | | <= | 9 |0000 0200| _MAY_UNMAP| | | | | | | <= | -10|0000 0400| _NO_FALLOC| | | | | | | <= | +10|0000 0400| | | | | | | | | 11|0000 0800| | | | | | | | | 12|0000 1000| | | | | | | | | 13|0000 2000|VALIDATION | | | | | |P_SPILLED | | diff --git a/src/cogs.h b/src/cogs.h index d229f10a..bd8c98f5 100644 --- a/src/cogs.h +++ b/src/cogs.h @@ -522,10 +522,6 @@ MDBX_INTERNAL void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, MDBX_INTERNAL void munlock_all(const MDBX_env *env); -static inline bool fallocate_disabled(const MDBX_env *env) { - return env->incore && (globals.runtime_flags & MDBX_DBG_NOFALLOC_INCORE) != 0; -} - /*----------------------------------------------------------------------------*/ /* Cache coherence and mmap invalidation */ #ifndef MDBX_CPU_WRITEBACK_INCOHERENT diff --git a/src/dxb.c b/src/dxb.c index 48e815cc..42e495f1 100644 --- a/src/dxb.c +++ b/src/dxb.c @@ -154,8 +154,6 @@ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const pgno_t unsigned mresize_flags = env->flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC); if (mode >= impilict_shrink) mresize_flags |= txn_shrink_allowed; - if (fallocate_disabled(env)) - mresize_flags |= MDBX_MRESIZE_NO_FALLOC; if (limit_bytes == env->dxb_mmap.limit && size_bytes == env->dxb_mmap.current && size_bytes == env->dxb_mmap.filesize) goto bailout; @@ -534,8 +532,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit if (unlikely(err != MDBX_SUCCESS)) return err; - err = (fallocate_disabled(env) ? osal_ftruncate : osal_fallocate)( - env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now); + err = osal_fsetsize(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -684,10 +681,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, env->geo_in_bytes.upper, - (lck_rc && env->stuck_meta < 0) - ? (fallocate_disabled(env) ? MMAP_OPTION_SETLENGTH | MMAP_OPTION_NOFALLOC : MMAP_OPTION_SETLENGTH) - : 0, - env->pathname.dxb); + (lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_SETLENGTH : 0, env->pathname.dxb); if (unlikely(err != MDBX_SUCCESS)) return err; diff --git a/src/lck-windows.c b/src/lck-windows.c index 06feb346..f1499a32 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -547,7 +547,7 @@ int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t curr if (synced && !inprocess_neighbor && env->lck_mmap.fd != INVALID_HANDLE_VALUE && lck_upgrade(env, true) == MDBX_SUCCESS) /* this will fail if LCK is used/mmapped by other process(es) */ - osal_ftruncate(env->lck_mmap.fd, 0); + osal_fsetsize(env->lck_mmap.fd, 0); } lck_unlock(env); return MDBX_SUCCESS; diff --git a/src/options.h b/src/options.h index fa570558..e3e0dafd 100644 --- a/src/options.h +++ b/src/options.h @@ -358,6 +358,19 @@ #error MDBX_USE_COPYFILERANGE must be defined as 0 or 1 #endif /* MDBX_USE_COPYFILERANGE */ +/** Advanced: Using posix_fallocate() or fcntl(F_PREALLOCATE) (autodetection by default). */ +#ifndef MDBX_USE_FALLOCATE +#if defined(__APPLE__) +#define MDBX_USE_FALLOCATE 0 /* Too slow and unclean, but not required to prevent SIGBUS */ +#elif (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || (__GLIBC_PREREQ(2, 10) && defined(_GNU_SOURCE)) +#define MDBX_USE_FALLOCATE 1 +#else +#define MDBX_USE_FALLOCATE 0 +#endif +#elif !(MDBX_USE_FALLOCATE == 0 || MDBX_USE_FALLOCATE == 1) +#error MDBX_USE_FALLOCATE must be defined as 0 or 1 +#endif /* MDBX_USE_FALLOCATE */ + //------------------------------------------------------------------------------ #ifndef MDBX_CPU_WRITEBACK_INCOHERENT diff --git a/src/osal.c b/src/osal.c index 665e6fcb..49e9743c 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1594,8 +1594,7 @@ int osal_is_pipe(mdbx_filehandle_t fd) { #endif } -/* truncate file: just set the length of a file */ -int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) { +int osal_fsetsize(mdbx_filehandle_t fd, const uint64_t length) { #if defined(_WIN32) || defined(_WIN64) if (imports.SetFileInformationByHandle) { FILE_END_OF_FILE_INFO EndOfFileInfo; @@ -1610,31 +1609,48 @@ int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) { } #else STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems"); - return ftruncate(fd, length) == 0 ? MDBX_SUCCESS : errno; -#endif -} -/* extend file: set the length of a file AND ensure the space has been allocated */ -int osal_fallocate(mdbx_filehandle_t fd, uint64_t length) { - assert(length > 0); - int err = MDBX_RESULT_TRUE; -#if (defined(__linux__) || defined(__gnu_linux__)) && \ - ((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 10)) || (defined(__ANDROID_API__) && __ANDROID_API__ >= 21)) - err = fallocate(fd, 0, 0, length) ? ignore_enosys_and_eremote(errno) : MDBX_SUCCESS; -#elif defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L && !defined(__APPLE__) - err = posix_fallocate(fd, 0, length) ? ignore_enosys_and_eremote(errno) : MDBX_SUCCESS; -#elif defined(__APPLE__) - fstore_t store = {F_ALLOCATEALL, F_PEOFPOSMODE, 0, length, 0}; - if (fcntl(fd, F_PREALLOCATE, &store)) - err = ignore_enosys_and_eremote(errno); -#endif /* Apple */ -#if !defined(_WIN32) && !defined(_WIN64) - /* Workaround for testing: ignore ENOSPC for TMPFS/RAMFS. - * This is insignificant for production, but it helps in some tests using /dev/shm inside docker/containers. */ - if (err == ENOSPC && osal_check_fs_incore(fd) == MDBX_RESULT_TRUE) - err = MDBX_RESULT_TRUE; +#if MDBX_USE_FALLOCATE + struct stat info; + if (unlikely(fstat(fd, &info))) + return errno; + + const uint64_t allocated = UINT64_C(512) * info.st_blocks; + if (length > allocated) { +#if defined(__APPLE__) + fstore_t store = { + .fst_flags = F_ALLOCATECONTIG, .fst_posmode = F_PEOFPOSMODE, .fst_offset = 0, .fst_length = length}; + int err = MDBX_SUCCESS; + if (fcntl(fd, F_PREALLOCATE, &store)) { + /* TODO: implement step-by-step allocation in chunks of 16384, 8192, 4094, 2048, 1024 Kb */ + store.fst_flags = F_ALLOCATEALL; + if (fcntl(fd, F_PREALLOCATE, &store)) + err = errno; + } +#elif defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L + const int err = posix_fallocate(fd, 0, length); + if (!err && length > (uint64_t)info.st_size) + info.st_size = length /* posix_fallocate() extends the file */; +#else + const int err = fallocate(fd, 0, 0, length) ? errno : MDBX_SUCCESS; + if (!err && length > (uint64_t)info.st_size) + info.st_size = length /* fallocate() extends the file */; +#endif + if (unlikely(err) && ignore_enosys_and_eremote(err) != MDBX_RESULT_TRUE) { + /* Workaround for testing: ignore ENOSPC for TMPFS/RAMFS. + * This is insignificant for production, but it helps in some tests using /dev/shm inside docker/containers. */ + if (err != ENOSPC || osal_check_fs_incore(fd) != MDBX_RESULT_TRUE) + return err; + } + } + + if (length == (uint64_t)info.st_size) + return MDBX_SUCCESS; +#endif + + return unlikely(ftruncate(fd, length)) ? errno : MDBX_SUCCESS; + #endif /* !Windows */ - return (err == MDBX_RESULT_TRUE) ? osal_ftruncate(fd, length) : err; } int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) { @@ -2087,8 +2103,8 @@ int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit return err; if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_SETLENGTH) != 0) { - err = ((options & MMAP_OPTION_NOFALLOC) ? osal_ftruncate : osal_fallocate)(map->fd, size); - VERBOSE("ftruncate %zu, err %d", size, err); + err = osal_fsetsize(map->fd, size); + VERBOSE("osal_fsetsize %zu, err %d", size, err); if (err != MDBX_SUCCESS) return err; map->filesize = size; @@ -2333,7 +2349,7 @@ retry_file_and_section: } if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) { - err = ((flags & MDBX_MRESIZE_NO_FALLOC) ? osal_ftruncate : osal_fallocate)(map->fd, size); + err = osal_fsetsize(map->fd, size); if (err == MDBX_SUCCESS) map->filesize = size; /* ignore error, because Windows unable shrink file @@ -2413,11 +2429,11 @@ retry_mapview:; } else { if (map->filesize != size) { if (size > map->filesize) { - rc = ((flags & MDBX_MRESIZE_NO_FALLOC) ? osal_ftruncate : osal_fallocate)(map->fd, size); - VERBOSE("f%s-%s %zu, err %d", "allocate", "extend", size, rc); + rc = osal_fsetsize(map->fd, size); + VERBOSE("osal_fsetsize-%s %zu, err %d", "extend", size, rc); } else if (flags & txn_shrink_allowed) { - rc = osal_ftruncate(map->fd, size); - VERBOSE("f%s-%s %zu, err %d", "truncate", "shrink", size, rc); + rc = osal_fsetsize(map->fd, size); + VERBOSE("osal_fsetsize-%s %zu, err %d", "shrink", size, rc); } if (unlikely(rc != MDBX_SUCCESS)) return rc; diff --git a/src/osal.h b/src/osal.h index f440603d..3cfab476 100644 --- a/src/osal.h +++ b/src/osal.h @@ -441,8 +441,7 @@ enum osal_syncmode_bits { }; MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, const enum osal_syncmode_bits mode_bits); -MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length); -MDBX_INTERNAL int osal_fallocate(mdbx_filehandle_t fd, uint64_t length); +MDBX_INTERNAL int osal_fsetsize(mdbx_filehandle_t fd, const uint64_t length); MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos); MDBX_INTERNAL int osal_filesize(mdbx_filehandle_t fd, uint64_t *length); @@ -481,14 +480,11 @@ MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait); #define MMAP_OPTION_SETLENGTH 1 #define MMAP_OPTION_SEMAPHORE 2 -#define MMAP_OPTION_NOFALLOC 4 MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit, const unsigned options, const pathchar_t *pathname4logging); MDBX_INTERNAL int osal_munmap(osal_mmap_t *map); - #define MDBX_MRESIZE_MAY_MOVE 0x00000100 #define MDBX_MRESIZE_MAY_UNMAP 0x00000200 -#define MDBX_MRESIZE_NO_FALLOC 0x00000400 MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, size_t limit); #if defined(_WIN32) || defined(_WIN64) typedef struct {