mirror of
https://github.com/isar/libmdbx.git
synced 2025-08-25 21:54:28 +08:00
mdbx: fix unexpected SIGBUS
is not enough space in a filesystem.
On a modern Linux the allocation of space for a file can be deferred and/or lazy, rather than when setting its length using `ftruncate()`. The actual allocation of space occurs when writing to the corresponding areas of the file, or when reading ones (in this case, the file system fills these areas with zeros). The specific behavior depends on the type of file system and the kernel version, but the main thing is that possibilities currently are, when setting the file size, just the instantaneous ability to allocate space is checked, without any booking. If the file system is running out of space, an `ENOSPC` error may occur when processing (inside a OS kernel) a page fault when accessing one of the added pages after the database has been enlarged. In this case, the OS kernel has no other alternative but to send a `SIGBUS` signal to the process. This commit fixes the problem by adding the use of system calls to explicitly allocate space for a given file size. Related-to https://github.com/erigontech/erigon/issues/16709 This is a simple improvement, however which is complicated by the need to take into account the availability of the appropriate system API and handle non-fatal errors from file systems that do not support the appropriate operations. Therefore, there is a risk of regressions in unusual/rare situations, including when hosting databases on network media.
This commit is contained in:
@@ -479,7 +479,7 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_fileha
|
||||
if (meta->geometry.now != meta->geometry.first_unallocated) {
|
||||
const size_t whole_size = pgno2bytes(env, meta->geometry.now);
|
||||
if (!dest_is_pipe)
|
||||
return osal_ftruncate(fd, whole_size);
|
||||
return osal_fallocate(fd, whole_size);
|
||||
|
||||
const size_t used_size = pgno2bytes(env, meta->geometry.first_unallocated);
|
||||
memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF);
|
||||
@@ -648,7 +648,7 @@ retry_snap_meta:
|
||||
/* Extend file if required */
|
||||
if (likely(rc == MDBX_SUCCESS) && whole_size != used_size) {
|
||||
if (!dest_is_pipe)
|
||||
rc = osal_ftruncate(fd, whole_size);
|
||||
rc = osal_fallocate(fd, whole_size);
|
||||
else {
|
||||
memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF);
|
||||
for (size_t offset = used_size; rc == MDBX_SUCCESS && offset < whole_size;) {
|
||||
|
@@ -532,7 +532,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
err = osal_ftruncate(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now);
|
||||
err = osal_fallocate(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
@@ -682,7 +682,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit
|
||||
!(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE;
|
||||
|
||||
err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, env->geo_in_bytes.upper,
|
||||
(lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0, env->pathname.dxb);
|
||||
(lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_SETLENGTH : 0, env->pathname.dxb);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
|
@@ -62,9 +62,9 @@ __cold static int lck_setup_locked(MDBX_env *env) {
|
||||
}
|
||||
env->max_readers = (maxreaders <= MDBX_READERS_LIMIT) ? (unsigned)maxreaders : (unsigned)MDBX_READERS_LIMIT;
|
||||
|
||||
err =
|
||||
osal_mmap((env->flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, &env->lck_mmap, (size_t)size, (size_t)size,
|
||||
lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE : MMAP_OPTION_SEMAPHORE, env->pathname.lck);
|
||||
err = osal_mmap((env->flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, &env->lck_mmap, (size_t)size, (size_t)size,
|
||||
lck_seize_rc ? MMAP_OPTION_SETLENGTH | MMAP_OPTION_SEMAPHORE : MMAP_OPTION_SEMAPHORE,
|
||||
env->pathname.lck);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
|
35
src/osal.c
35
src/osal.c
@@ -1594,6 +1594,7 @@ MDBX_INTERNAL int osal_is_pipe(mdbx_filehandle_t fd) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/* truncate file: just set the length of a file */
|
||||
MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
if (imports.SetFileInformationByHandle) {
|
||||
@@ -1613,6 +1614,23 @@ MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/* extend file: set the length of a file AND ensure the space has been allocated */
|
||||
MDBX_INTERNAL int osal_fallocate(mdbx_filehandle_t fd, uint64_t length) {
|
||||
assert(length > 0);
|
||||
int err = MDBX_RESULT_TRUE;
|
||||
#if (defined(__linux__) || defined(__gnu_linux__)) && \
|
||||
((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 10)) || (defined(__ANDROID_API__) && __ANDROID_API__ >= 21))
|
||||
err = fallocate(fd, 0, 0, length) ? ignore_enosys_and_eremote(errno) : MDBX_SUCCESS;
|
||||
#elif defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L && !defined(__APPLE__)
|
||||
err = posix_fallocate(fd, 0, length) ? ignore_enosys_and_eremote(errno) : MDBX_SUCCESS;
|
||||
#elif defined(__APPLE__)
|
||||
fstore_t store = {F_ALLOCATEALL, F_PEOFPOSMODE, 0, length, 0};
|
||||
if (fcntl(fd, F_PREALLOCATE, &store))
|
||||
err = ignore_enosys_and_eremote(errno);
|
||||
#endif /* Apple */
|
||||
return (err == MDBX_RESULT_TRUE) ? osal_ftruncate(fd, length) : err;
|
||||
}
|
||||
|
||||
MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
LARGE_INTEGER li;
|
||||
@@ -2063,8 +2081,8 @@ MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, cons
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_TRUNCATE) != 0) {
|
||||
err = osal_ftruncate(map->fd, size);
|
||||
if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_SETLENGTH) != 0) {
|
||||
err = osal_fallocate(map->fd, size);
|
||||
VERBOSE("ftruncate %zu, err %d", size, err);
|
||||
if (err != MDBX_SUCCESS)
|
||||
return err;
|
||||
@@ -2310,7 +2328,7 @@ retry_file_and_section:
|
||||
}
|
||||
|
||||
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
|
||||
err = osal_ftruncate(map->fd, size);
|
||||
err = osal_fallocate(map->fd, size);
|
||||
if (err == MDBX_SUCCESS)
|
||||
map->filesize = size;
|
||||
/* ignore error, because Windows unable shrink file
|
||||
@@ -2388,10 +2406,15 @@ retry_mapview:;
|
||||
rc = MDBX_EPERM;
|
||||
map->current = (map->filesize > limit) ? limit : (size_t)map->filesize;
|
||||
} else {
|
||||
if (size > map->filesize || (size < map->filesize && (flags & txn_shrink_allowed))) {
|
||||
if (map->filesize != size) {
|
||||
if (size > map->filesize) {
|
||||
rc = osal_fallocate(map->fd, size);
|
||||
VERBOSE("f%s-%s %zu, err %d", "allocate", "extend", size, rc);
|
||||
} else if (flags & txn_shrink_allowed) {
|
||||
rc = osal_ftruncate(map->fd, size);
|
||||
VERBOSE("ftruncate %zu, err %d", size, rc);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
VERBOSE("f%s-%s %zu, err %d", "truncate", "shrink", size, rc);
|
||||
}
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
map->filesize = size;
|
||||
}
|
||||
|
@@ -434,6 +434,7 @@ enum osal_syncmode_bits {
|
||||
|
||||
MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, const enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length);
|
||||
MDBX_INTERNAL int osal_fallocate(mdbx_filehandle_t fd, uint64_t length);
|
||||
MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
@@ -470,7 +471,7 @@ MDBX_INTERNAL int osal_removedirectory(const pathchar_t *pathname);
|
||||
MDBX_INTERNAL int osal_is_pipe(mdbx_filehandle_t fd);
|
||||
MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait);
|
||||
|
||||
#define MMAP_OPTION_TRUNCATE 1
|
||||
#define MMAP_OPTION_SETLENGTH 1
|
||||
#define MMAP_OPTION_SEMAPHORE 2
|
||||
MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit, const unsigned options,
|
||||
const pathchar_t *pathname4logging);
|
||||
|
Reference in New Issue
Block a user