mirror of
https://github.com/isar/libmdbx.git
synced 2025-02-01 08:58:20 +08:00
mdbx: rework readahead management.
Change-Id: Ic1e2f3f5bd7fb2e9fd8d5820816e3d2def6ec1b3
This commit is contained in:
parent
9c89e7c739
commit
b4a5728455
@ -3406,7 +3406,64 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
__cold static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */
|
||||||
|
static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
|
||||||
|
const size_t length, const bool enable) {
|
||||||
|
assert(length > 0);
|
||||||
|
|
||||||
|
mdbx_notice("readahead %s %u..%u", enable ? "ON" : "OFF",
|
||||||
|
bytes2pgno(env, offset), bytes2pgno(env, offset + length));
|
||||||
|
|
||||||
|
#if defined(F_RDAHEAD)
|
||||||
|
if (unlikely(fcntl(env->me_fd, F_RDAHEAD, enable) == -1))
|
||||||
|
return errno;
|
||||||
|
#endif /* F_RDAHEAD */
|
||||||
|
|
||||||
|
if (enable) {
|
||||||
|
#if defined(F_RDADVISE)
|
||||||
|
struct radvisory hint;
|
||||||
|
hint.ra_offset = offset;
|
||||||
|
hint.ra_count = length;
|
||||||
|
(void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl(
|
||||||
|
env->me_fd, F_RDADVISE, &hint);
|
||||||
|
#endif /* F_RDADVISE */
|
||||||
|
#if defined(MADV_WILLNEED)
|
||||||
|
if (unlikely(madvise(env->me_map + offset, length, MADV_WILLNEED) != 0))
|
||||||
|
return errno;
|
||||||
|
#elif defined(POSIX_MADV_WILLNEED)
|
||||||
|
rc = posix_madvise(env->me_map + offset, length, POSIX_MADV_WILLNEED);
|
||||||
|
if (unlikely(rc != 0))
|
||||||
|
return errno;
|
||||||
|
#elif defined(_WIN32) || defined(_WIN64)
|
||||||
|
if (mdbx_PrefetchVirtualMemory) {
|
||||||
|
WIN32_MEMORY_RANGE_ENTRY hint;
|
||||||
|
hint.VirtualAddress = env->me_map + offset;
|
||||||
|
hint.NumberOfBytes = length;
|
||||||
|
(void)mdbx_PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0);
|
||||||
|
}
|
||||||
|
#elif defined(POSIX_FADV_WILLNEED)
|
||||||
|
int err = posix_fadvise(env->me_fd, offset, length, POSIX_FADV_WILLNEED);
|
||||||
|
if (unlikely(err != 0))
|
||||||
|
return err;
|
||||||
|
#endif /* MADV_WILLNEED */
|
||||||
|
} else {
|
||||||
|
#if defined(MADV_RANDOM)
|
||||||
|
if (unlikely(madvise(env->me_map + offset, length, MADV_RANDOM) != 0))
|
||||||
|
return errno;
|
||||||
|
#elif defined(POSIX_MADV_RANDOM)
|
||||||
|
int err = posix_madvise(env->me_map + offset, length, POSIX_MADV_RANDOM);
|
||||||
|
if (unlikely(err != 0))
|
||||||
|
return err;
|
||||||
|
#elif defined(POSIX_FADV_RANDOM)
|
||||||
|
int err = posix_fadvise(env->me_fd, offset, length, POSIX_FADV_RANDOM);
|
||||||
|
if (unlikely(err != 0))
|
||||||
|
return err;
|
||||||
|
#endif /* MADV_RANDOM */
|
||||||
|
}
|
||||||
|
return MDBX_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
||||||
const pgno_t limit_pgno) {
|
const pgno_t limit_pgno) {
|
||||||
#ifdef MDBX_USE_VALGRIND
|
#ifdef MDBX_USE_VALGRIND
|
||||||
const size_t prev_mapsize = env->me_mapsize;
|
const size_t prev_mapsize = env->me_mapsize;
|
||||||
@ -3489,6 +3546,28 @@ __cold static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
|||||||
}
|
}
|
||||||
|
|
||||||
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
|
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
|
||||||
|
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
|
||||||
|
const size_t readahead_offset =
|
||||||
|
(limit_bytes != env->me_dbgeo.upper
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
|| env->me_dbgeo.now > size_bytes
|
||||||
|
#endif /* Windows */
|
||||||
|
)
|
||||||
|
? 0 /* reassign readahead to the entire map
|
||||||
|
because it (likely) was remapped */
|
||||||
|
: env->me_dbgeo.now;
|
||||||
|
rc = mdbx_is_readahead_reasonable(size_bytes, 0);
|
||||||
|
if (rc == MDBX_RESULT_FALSE)
|
||||||
|
rc = mdbx_set_readahead(env, 0, size_bytes, false);
|
||||||
|
else if (rc == MDBX_RESULT_TRUE) {
|
||||||
|
rc = MDBX_SUCCESS;
|
||||||
|
if (size_bytes > readahead_offset) {
|
||||||
|
*env->me_discarded_tail = size_pgno;
|
||||||
|
rc = mdbx_set_readahead(env, readahead_offset,
|
||||||
|
size_bytes - readahead_offset, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bailout:
|
bailout:
|
||||||
if (rc == MDBX_SUCCESS) {
|
if (rc == MDBX_SUCCESS) {
|
||||||
@ -7347,63 +7426,6 @@ bailout:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __cold mdbx_env_map(MDBX_env *env, const int is_exclusive,
|
|
||||||
const size_t usedsize) {
|
|
||||||
int rc = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
|
|
||||||
env->me_dbgeo.upper);
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
#ifdef MADV_DONTFORK
|
|
||||||
if (unlikely(madvise(env->me_map, env->me_mapsize, MADV_DONTFORK) != 0))
|
|
||||||
return errno;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef MADV_NOHUGEPAGE
|
|
||||||
(void)madvise(env->me_map, env->me_mapsize, MADV_NOHUGEPAGE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
|
|
||||||
const size_t meta_length = pgno2bytes(env, NUM_METAS);
|
|
||||||
(void)madvise(env->me_map, meta_length, MADV_DODUMP);
|
|
||||||
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length,
|
|
||||||
(mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP
|
|
||||||
: MADV_DONTDUMP);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const size_t used_aligned2os_bytes =
|
|
||||||
roundup_powerof2(usedsize, env->me_os_psize);
|
|
||||||
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
|
|
||||||
if (used_aligned2os_bytes < env->me_dbgeo.now) {
|
|
||||||
#if defined(MADV_REMOVE)
|
|
||||||
if (is_exclusive && (env->me_flags & MDBX_WRITEMAP) != 0)
|
|
||||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
|
||||||
env->me_dbgeo.now - used_aligned2os_bytes, MADV_REMOVE);
|
|
||||||
#else
|
|
||||||
(void)is_exclusive;
|
|
||||||
#endif /* MADV_REMOVE */
|
|
||||||
#if defined(MADV_DONTNEED)
|
|
||||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
|
||||||
env->me_dbgeo.now - used_aligned2os_bytes, MADV_DONTNEED);
|
|
||||||
#elif defined(POSIX_MADV_DONTNEED)
|
|
||||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
|
||||||
env->me_dbgeo.now - used_aligned2os_bytes,
|
|
||||||
POSIX_MADV_DONTNEED);
|
|
||||||
#elif defined(POSIX_FADV_DONTNEED)
|
|
||||||
(void)posix_fadvise(env->me_fd, used_aligned2os_bytes,
|
|
||||||
env->me_dbgeo.now - used_aligned2os_bytes,
|
|
||||||
POSIX_FADV_DONTNEED);
|
|
||||||
#endif /* MADV_DONTNEED */
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef MDBX_USE_VALGRIND
|
|
||||||
env->me_valgrind_handle =
|
|
||||||
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return MDBX_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
__cold LIBMDBX_API int
|
__cold LIBMDBX_API int
|
||||||
mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
||||||
intptr_t size_upper, intptr_t growth_step,
|
intptr_t size_upper, intptr_t growth_step,
|
||||||
@ -7595,15 +7617,6 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
|||||||
if (bytes2pgno(env, shrink_threshold) > UINT16_MAX)
|
if (bytes2pgno(env, shrink_threshold) > UINT16_MAX)
|
||||||
shrink_threshold = pgno2bytes(env, UINT16_MAX);
|
shrink_threshold = pgno2bytes(env, UINT16_MAX);
|
||||||
|
|
||||||
#ifdef POSIX_FADV_RANDOM
|
|
||||||
if (env->me_fd != INVALID_HANDLE_VALUE) {
|
|
||||||
/* this also checks that the file size is valid for a particular FS */
|
|
||||||
rc = posix_fadvise(env->me_fd, 0, env->me_dbgeo.upper, POSIX_FADV_RANDOM);
|
|
||||||
if (unlikely(rc != 0))
|
|
||||||
goto bailout;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* save user's geo-params for future open/create */
|
/* save user's geo-params for future open/create */
|
||||||
env->me_dbgeo.lower = size_lower;
|
env->me_dbgeo.lower = size_lower;
|
||||||
env->me_dbgeo.now = size_now;
|
env->me_dbgeo.now = size_now;
|
||||||
@ -7929,7 +7942,56 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = mdbx_env_map(env, lck_rc /* exclusive status */, used_bytes);
|
err = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
|
||||||
|
env->me_dbgeo.upper);
|
||||||
|
if (unlikely(err != MDBX_SUCCESS))
|
||||||
|
return err;
|
||||||
|
|
||||||
|
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
|
||||||
|
const size_t meta_length = pgno2bytes(env, NUM_METAS);
|
||||||
|
(void)madvise(env->me_map, meta_length, MADV_DODUMP);
|
||||||
|
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length,
|
||||||
|
(mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP
|
||||||
|
: MADV_DONTDUMP);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const size_t used_aligned2os_bytes =
|
||||||
|
roundup_powerof2(used_bytes, env->me_os_psize);
|
||||||
|
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
|
||||||
|
if (used_aligned2os_bytes < env->me_dbgeo.now) {
|
||||||
|
#if defined(MADV_REMOVE)
|
||||||
|
if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0) {
|
||||||
|
mdbx_notice("open-MADV_%s %u..%u", "REMOVE", *env->me_discarded_tail,
|
||||||
|
bytes2pgno(env, env->me_dbgeo.now));
|
||||||
|
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||||
|
env->me_dbgeo.now - used_aligned2os_bytes, MADV_REMOVE);
|
||||||
|
}
|
||||||
|
#endif /* MADV_REMOVE */
|
||||||
|
#if defined(MADV_DONTNEED)
|
||||||
|
mdbx_notice("open-MADV_%s %u..%u", "DONTNEED", *env->me_discarded_tail,
|
||||||
|
bytes2pgno(env, env->me_dbgeo.now));
|
||||||
|
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||||
|
env->me_dbgeo.now - used_aligned2os_bytes, MADV_DONTNEED);
|
||||||
|
#elif defined(POSIX_MADV_DONTNEED)
|
||||||
|
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||||
|
env->me_dbgeo.now - used_aligned2os_bytes,
|
||||||
|
POSIX_MADV_DONTNEED);
|
||||||
|
#elif defined(POSIX_FADV_DONTNEED)
|
||||||
|
(void)posix_fadvise(env->me_fd, used_aligned2os_bytes,
|
||||||
|
env->me_dbgeo.now - used_aligned2os_bytes,
|
||||||
|
POSIX_FADV_DONTNEED);
|
||||||
|
#endif /* MADV_DONTNEED */
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef MDBX_USE_VALGRIND
|
||||||
|
env->me_valgrind_handle =
|
||||||
|
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const bool readahead =
|
||||||
|
(env->me_flags & MDBX_NORDAHEAD) == 0 &&
|
||||||
|
mdbx_is_readahead_reasonable(env->me_dbgeo.now, 0) == MDBX_RESULT_TRUE;
|
||||||
|
err = mdbx_set_readahead(env, 0, env->me_dbgeo.now, readahead);
|
||||||
if (err != MDBX_SUCCESS)
|
if (err != MDBX_SUCCESS)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
@ -8229,21 +8291,11 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
|||||||
(void)madvise(env->me_lck, size, MADV_DODUMP);
|
(void)madvise(env->me_lck, size, MADV_DODUMP);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MADV_DONTFORK
|
|
||||||
if (madvise(env->me_lck, size, MADV_DONTFORK) < 0)
|
|
||||||
goto bailout;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef MADV_WILLNEED
|
#ifdef MADV_WILLNEED
|
||||||
if (madvise(env->me_lck, size, MADV_WILLNEED) < 0)
|
if (madvise(env->me_lck, size, MADV_WILLNEED) < 0)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MADV_RANDOM
|
|
||||||
if (madvise(env->me_lck, size, MADV_RANDOM) < 0)
|
|
||||||
goto bailout;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (lck_seize_rc == MDBX_RESULT_TRUE) {
|
if (lck_seize_rc == MDBX_RESULT_TRUE) {
|
||||||
/* LY: exlcusive mode, reset lck */
|
/* LY: exlcusive mode, reset lck */
|
||||||
memset(env->me_lck, 0, (size_t)size);
|
memset(env->me_lck, 0, (size_t)size);
|
||||||
|
@ -1089,13 +1089,24 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
|
|||||||
map->address = mmap(
|
map->address = mmap(
|
||||||
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
||||||
MAP_SHARED, map->fd, 0);
|
MAP_SHARED, map->fd, 0);
|
||||||
if (likely(map->address != MAP_FAILED)) {
|
|
||||||
map->length = limit;
|
if (unlikely(map->address == MAP_FAILED)) {
|
||||||
return MDBX_SUCCESS;
|
map->length = 0;
|
||||||
|
map->address = nullptr;
|
||||||
|
return errno;
|
||||||
}
|
}
|
||||||
map->length = 0;
|
map->length = limit;
|
||||||
map->address = nullptr;
|
|
||||||
return errno;
|
#ifdef MADV_DONTFORK
|
||||||
|
if (unlikely(madvise(map->address, map->length, MADV_DONTFORK) != 0))
|
||||||
|
return errno;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MADV_NOHUGEPAGE
|
||||||
|
(void)madvise(map->address, map->length, MADV_NOHUGEPAGE);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return MDBX_SUCCESS;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,8 +32,8 @@ void actor_params::set_defaults(const std::string &tmpdir) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
pathname_db = tmpdir + "mdbx-test.db";
|
pathname_db = tmpdir + "mdbx-test.db";
|
||||||
mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NORDAHEAD |
|
mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOMEMINIT |
|
||||||
MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_LIFORECLAIM;
|
MDBX_COALESCE | MDBX_LIFORECLAIM;
|
||||||
table_flags = MDBX_DUPSORT;
|
table_flags = MDBX_DUPSORT;
|
||||||
|
|
||||||
size_lower = -1;
|
size_lower = -1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user