mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:24:12 +08:00
mdbx: refactor me_dbgeo usage and osal-mmap/mresize().
Change-Id: I1f29c953abcbd4f2bab7ba52e7dd9da85ea48354
This commit is contained in:
parent
b4729bd1d6
commit
415cb5f886
@ -3411,7 +3411,6 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
|
||||
static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
|
||||
const size_t length, const bool enable) {
|
||||
assert(length > 0);
|
||||
|
||||
mdbx_notice("readahead %s %u..%u", enable ? "ON" : "OFF",
|
||||
bytes2pgno(env, offset), bytes2pgno(env, offset + length));
|
||||
|
||||
@ -3466,18 +3465,14 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
|
||||
|
||||
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
||||
const pgno_t limit_pgno) {
|
||||
#ifdef MDBX_USE_VALGRIND
|
||||
const size_t prev_mapsize = env->me_mapsize;
|
||||
void *const prev_mapaddr = env->me_map;
|
||||
#endif
|
||||
|
||||
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
|
||||
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
|
||||
|
||||
mdbx_verbose("resize datafile/mapping: "
|
||||
"present %" PRIuPTR " -> %" PRIuPTR ", "
|
||||
"limit %" PRIuPTR " -> %" PRIuPTR,
|
||||
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, limit_bytes);
|
||||
env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
|
||||
limit_bytes);
|
||||
|
||||
mdbx_assert(env, limit_bytes >= size_bytes);
|
||||
mdbx_assert(env, bytes2pgno(env, size_bytes) >= size_pgno);
|
||||
@ -3491,9 +3486,9 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
||||
mdbx_handle_array_t *suspended = NULL;
|
||||
mdbx_handle_array_t array_onstack;
|
||||
int rc = MDBX_SUCCESS;
|
||||
if (limit_bytes == env->me_dxb_mmap.length &&
|
||||
if (limit_bytes == env->me_dxb_mmap.limit &&
|
||||
size_bytes == env->me_dxb_mmap.current &&
|
||||
env->me_dxb_mmap.current == env->me_dxb_mmap.filesize)
|
||||
size_bytes == env->me_dxb_mmap.filesize)
|
||||
goto bailout;
|
||||
|
||||
/* 1) Windows allows only extending a read-write section, but not a
|
||||
@ -3517,27 +3512,31 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
||||
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
return rc;
|
||||
if (limit_bytes == env->me_dxb_mmap.length && size_bytes == env->me_dbgeo.now)
|
||||
if (limit_bytes == env->me_dxb_mmap.limit &&
|
||||
size_bytes == env->me_dxb_mmap.current)
|
||||
goto bailout;
|
||||
#endif /* Windows */
|
||||
|
||||
if (size_bytes < env->me_dbgeo.now) {
|
||||
const size_t prev_limit = env->me_dxb_mmap.limit;
|
||||
const void *const prev_addr = env->me_map;
|
||||
const size_t prev_size = env->me_dxb_mmap.current;
|
||||
if (size_bytes < prev_size) {
|
||||
mdbx_notice("resize-MADV_%s %u..%u",
|
||||
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED",
|
||||
size_pgno, bytes2pgno(env, env->me_dbgeo.now));
|
||||
size_pgno, bytes2pgno(env, prev_size));
|
||||
#if defined(MADV_REMOVE)
|
||||
if ((env->me_flags & MDBX_WRITEMAP) == 0 ||
|
||||
madvise(env->me_map + size_bytes, env->me_dbgeo.now - size_bytes,
|
||||
madvise(env->me_map + size_bytes, prev_size - size_bytes,
|
||||
MADV_REMOVE) != 0)
|
||||
#endif
|
||||
#if defined(MADV_DONTNEED)
|
||||
(void)madvise(env->me_map + size_bytes, env->me_dbgeo.now - size_bytes,
|
||||
(void)madvise(env->me_map + size_bytes, prev_size - size_bytes,
|
||||
MADV_DONTNEED);
|
||||
#elif defined(POSIX_MADV_DONTNEED)
|
||||
(void)posix_madvise(env->me_map + size_bytes,
|
||||
env->me_dbgeo.now - size_bytes, POSIX_MADV_DONTNEED);
|
||||
(void)posix_madvise(env->me_map + size_bytes, prev_size - size_bytes,
|
||||
POSIX_MADV_DONTNEED);
|
||||
#elif defined(POSIX_FADV_DONTNEED)
|
||||
(void)posix_fadvise(env->me_fd, size_bytes, env->me_dbgeo.now - size_bytes,
|
||||
(void)posix_fadvise(env->me_fd, size_bytes, prev_size - size_bytes,
|
||||
POSIX_FADV_DONTNEED);
|
||||
#else
|
||||
__noop();
|
||||
@ -3548,24 +3547,25 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
|
||||
|
||||
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
|
||||
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
|
||||
const size_t readahead_offset =
|
||||
(limit_bytes != env->me_dbgeo.upper
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|| env->me_dbgeo.now > size_bytes
|
||||
#endif /* Windows */
|
||||
)
|
||||
? 0 /* reassign readahead to the entire map
|
||||
because it (likely) was remapped */
|
||||
: env->me_dbgeo.now;
|
||||
rc = mdbx_is_readahead_reasonable(size_bytes, 0);
|
||||
if (rc == MDBX_RESULT_FALSE)
|
||||
rc = mdbx_set_readahead(env, 0, size_bytes, false);
|
||||
rc = mdbx_set_readahead(
|
||||
env, 0, (size_bytes > prev_size) ? size_bytes : prev_size, false);
|
||||
else if (rc == MDBX_RESULT_TRUE) {
|
||||
rc = MDBX_SUCCESS;
|
||||
if (size_bytes > readahead_offset) {
|
||||
const size_t readahead_pivot =
|
||||
(limit_bytes != prev_limit || env->me_dxb_mmap.address != prev_addr
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|| prev_size > size_bytes
|
||||
#endif /* Windows */
|
||||
)
|
||||
? 0 /* reassign readahead to the entire map
|
||||
because it was remapped */
|
||||
: prev_size;
|
||||
if (size_bytes > readahead_pivot) {
|
||||
*env->me_discarded_tail = size_pgno;
|
||||
rc = mdbx_set_readahead(env, readahead_offset,
|
||||
size_bytes - readahead_offset, true);
|
||||
rc = mdbx_set_readahead(env, readahead_pivot,
|
||||
size_bytes - readahead_pivot, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3575,35 +3575,33 @@ bailout:
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
mdbx_assert(env, size_bytes == env->me_dxb_mmap.current);
|
||||
mdbx_assert(env, size_bytes <= env->me_dxb_mmap.filesize);
|
||||
mdbx_assert(env, limit_bytes == env->me_dxb_mmap.length);
|
||||
#endif
|
||||
env->me_dbgeo.now = size_bytes;
|
||||
env->me_dbgeo.upper = limit_bytes;
|
||||
mdbx_assert(env, limit_bytes == env->me_dxb_mmap.limit);
|
||||
#endif /* Windows */
|
||||
if (env->me_txn) {
|
||||
mdbx_tassert(env->me_txn, size_pgno >= env->me_txn->mt_next_pgno);
|
||||
env->me_txn->mt_end_pgno = env->me_txn0->mt_end_pgno = size_pgno;
|
||||
}
|
||||
#ifdef MDBX_USE_VALGRIND
|
||||
if (prev_mapsize != env->me_mapsize || prev_mapaddr != env->me_map) {
|
||||
if (prev_limit != env->me_dxb_mmap.limit || prev_addr != env->me_map) {
|
||||
VALGRIND_DISCARD(env->me_valgrind_handle);
|
||||
env->me_valgrind_handle = 0;
|
||||
if (env->me_mapsize)
|
||||
if (env->me_dxb_mmap.limit)
|
||||
env->me_valgrind_handle =
|
||||
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
|
||||
VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
|
||||
}
|
||||
#endif
|
||||
#endif /* MDBX_USE_VALGRIND */
|
||||
} else {
|
||||
if (rc != MDBX_RESULT_TRUE) {
|
||||
mdbx_error("failed resize datafile/mapping: "
|
||||
"present %" PRIuPTR " -> %" PRIuPTR ", "
|
||||
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
|
||||
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper,
|
||||
env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
|
||||
limit_bytes, rc);
|
||||
} else {
|
||||
mdbx_notice("unable resize datafile/mapping: "
|
||||
"present %" PRIuPTR " -> %" PRIuPTR ", "
|
||||
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
|
||||
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper,
|
||||
env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
|
||||
limit_bytes, rc);
|
||||
}
|
||||
if (!env->me_dxb_mmap.address) {
|
||||
@ -4775,7 +4773,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
||||
rc = MDBX_PANIC;
|
||||
} else {
|
||||
const size_t size = pgno2bytes(env, txn->mt_end_pgno);
|
||||
if (unlikely(size > env->me_mapsize)) {
|
||||
if (unlikely(size > env->me_dxb_mmap.limit)) {
|
||||
if (txn->mt_geo.upper > MAX_PAGENO ||
|
||||
bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) !=
|
||||
txn->mt_geo.upper) {
|
||||
@ -4797,7 +4795,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
env->me_dbgeo.now = size;
|
||||
env->me_dxb_mmap.current = size;
|
||||
}
|
||||
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
|
||||
mdbx_txn_valgrind(env, txn);
|
||||
@ -7943,14 +7941,14 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
}
|
||||
|
||||
err = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
|
||||
env->me_dbgeo.upper);
|
||||
env->me_dbgeo.upper, lck_rc);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
|
||||
const size_t meta_length = pgno2bytes(env, NUM_METAS);
|
||||
(void)madvise(env->me_map, meta_length, MADV_DODUMP);
|
||||
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length,
|
||||
(void)madvise(env->me_map + meta_length, env->me_dxb_mmap.limit - meta_length,
|
||||
(mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP
|
||||
: MADV_DONTDUMP);
|
||||
#endif
|
||||
@ -7958,51 +7956,53 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
const size_t used_aligned2os_bytes =
|
||||
roundup_powerof2(used_bytes, env->me_os_psize);
|
||||
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
|
||||
if (used_aligned2os_bytes < env->me_dbgeo.now) {
|
||||
if (used_aligned2os_bytes < env->me_dxb_mmap.current) {
|
||||
#if defined(MADV_REMOVE)
|
||||
if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0) {
|
||||
mdbx_notice("open-MADV_%s %u..%u", "REMOVE", *env->me_discarded_tail,
|
||||
bytes2pgno(env, env->me_dbgeo.now));
|
||||
bytes2pgno(env, env->me_dxb_mmap.current));
|
||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||
env->me_dbgeo.now - used_aligned2os_bytes, MADV_REMOVE);
|
||||
env->me_dxb_mmap.current - used_aligned2os_bytes,
|
||||
MADV_REMOVE);
|
||||
}
|
||||
#endif /* MADV_REMOVE */
|
||||
#if defined(MADV_DONTNEED)
|
||||
mdbx_notice("open-MADV_%s %u..%u", "DONTNEED", *env->me_discarded_tail,
|
||||
bytes2pgno(env, env->me_dbgeo.now));
|
||||
bytes2pgno(env, env->me_dxb_mmap.current));
|
||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||
env->me_dbgeo.now - used_aligned2os_bytes, MADV_DONTNEED);
|
||||
env->me_dxb_mmap.current - used_aligned2os_bytes,
|
||||
MADV_DONTNEED);
|
||||
#elif defined(POSIX_MADV_DONTNEED)
|
||||
(void)madvise(env->me_map + used_aligned2os_bytes,
|
||||
env->me_dbgeo.now - used_aligned2os_bytes,
|
||||
env->me_dxb_mmap.current - used_aligned2os_bytes,
|
||||
POSIX_MADV_DONTNEED);
|
||||
#elif defined(POSIX_FADV_DONTNEED)
|
||||
(void)posix_fadvise(env->me_fd, used_aligned2os_bytes,
|
||||
env->me_dbgeo.now - used_aligned2os_bytes,
|
||||
env->me_dxb_mmap.current - used_aligned2os_bytes,
|
||||
POSIX_FADV_DONTNEED);
|
||||
#endif /* MADV_DONTNEED */
|
||||
}
|
||||
|
||||
#ifdef MDBX_USE_VALGRIND
|
||||
env->me_valgrind_handle =
|
||||
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
|
||||
VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
|
||||
#endif
|
||||
|
||||
const bool readahead =
|
||||
(env->me_flags & MDBX_NORDAHEAD) == 0 &&
|
||||
mdbx_is_readahead_reasonable(env->me_dbgeo.now, 0) == MDBX_RESULT_TRUE;
|
||||
err = mdbx_set_readahead(env, 0, env->me_dbgeo.now, readahead);
|
||||
const bool readahead = (env->me_flags & MDBX_NORDAHEAD) == 0 &&
|
||||
mdbx_is_readahead_reasonable(env->me_dxb_mmap.current,
|
||||
0) == MDBX_RESULT_TRUE;
|
||||
err = mdbx_set_readahead(env, 0, env->me_dxb_mmap.current, readahead);
|
||||
if (err != MDBX_SUCCESS)
|
||||
return err;
|
||||
|
||||
mdbx_assert(env, used_bytes >= pgno2bytes(env, NUM_METAS) &&
|
||||
used_bytes <= env->me_mapsize);
|
||||
used_bytes <= env->me_dxb_mmap.limit);
|
||||
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
|
||||
VALGRIND_MAKE_MEM_NOACCESS(env->me_map + used_bytes,
|
||||
env->me_mapsize - used_bytes);
|
||||
env->me_dxb_mmap.limit - used_bytes);
|
||||
ASAN_POISON_MEMORY_REGION(env->me_map + used_bytes,
|
||||
env->me_mapsize - used_bytes);
|
||||
env->me_poison_edge = bytes2pgno(env, env->me_mapsize);
|
||||
env->me_dxb_mmap.limit - used_bytes);
|
||||
env->me_poison_edge = bytes2pgno(env, env->me_dxb_mmap.limit);
|
||||
#endif /* MDBX_USE_VALGRIND */
|
||||
|
||||
/* NOTE: AddressSanitizer (at least GCC 7.x, 8.x) could generate
|
||||
@ -8097,29 +8097,19 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
|
||||
const MDBX_meta *head = mdbx_meta_head(env);
|
||||
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
|
||||
/* re-check file size after mmap */
|
||||
uint64_t filesize_after_mmap;
|
||||
err = mdbx_filesize(env->me_fd, &filesize_after_mmap);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
if (filesize_after_mmap != expected_bytes) {
|
||||
if (filesize_after_mmap != filesize_before_mmap)
|
||||
mdbx_verbose("datafile resized by system to %" PRIu64 " bytes",
|
||||
filesize_after_mmap);
|
||||
if (filesize_after_mmap % env->me_os_psize ||
|
||||
filesize_after_mmap > env->me_dbgeo.upper ||
|
||||
filesize_after_mmap < used_bytes) {
|
||||
mdbx_error("unacceptable/unexpected datafile size %" PRIu64,
|
||||
filesize_after_mmap);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if ((env->me_flags & MDBX_RDONLY) == 0) {
|
||||
meta.mm_geo.now =
|
||||
bytes2pgno(env, env->me_dbgeo.now = (size_t)filesize_after_mmap);
|
||||
mdbx_verbose("update meta-geo to filesize %" PRIuPTR
|
||||
" bytes, %" PRIaPGNO " pages",
|
||||
env->me_dbgeo.now, meta.mm_geo.now);
|
||||
}
|
||||
/* re-check size after mmap */
|
||||
if ((env->me_dxb_mmap.current & (env->me_os_psize - 1)) != 0 ||
|
||||
env->me_dxb_mmap.current < used_bytes) {
|
||||
mdbx_error("unacceptable/unexpected datafile size %" PRIuPTR,
|
||||
env->me_dxb_mmap.current);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if (env->me_dxb_mmap.current != expected_bytes &&
|
||||
(env->me_flags & MDBX_RDONLY) == 0) {
|
||||
meta.mm_geo.now = bytes2pgno(env, env->me_dxb_mmap.current);
|
||||
mdbx_verbose("update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO
|
||||
" pages",
|
||||
env->me_dxb_mmap.current, meta.mm_geo.now);
|
||||
}
|
||||
|
||||
if (memcmp(&meta.mm_geo, &head->mm_geo, sizeof(meta.mm_geo))) {
|
||||
@ -8248,30 +8238,23 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
goto bailout;
|
||||
|
||||
if (lck_seize_rc == MDBX_RESULT_TRUE) {
|
||||
uint64_t wanna = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) +
|
||||
sizeof(MDBX_lockinfo),
|
||||
env->me_os_psize);
|
||||
size = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) +
|
||||
sizeof(MDBX_lockinfo),
|
||||
env->me_os_psize);
|
||||
#ifndef NDEBUG
|
||||
err = mdbx_ftruncate(env->me_lfd, size = 0);
|
||||
err = mdbx_ftruncate(env->me_lfd, 0);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
#endif
|
||||
mdbx_jitter4testing(false);
|
||||
|
||||
if (size != wanna) {
|
||||
err = mdbx_ftruncate(env->me_lfd, wanna);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
size = wanna;
|
||||
}
|
||||
} else {
|
||||
if (env->me_flags & MDBX_EXCLUSIVE) {
|
||||
err = MDBX_BUSY;
|
||||
goto bailout;
|
||||
}
|
||||
if (size > PTRDIFF_MAX || (size & (env->me_os_psize - 1)) ||
|
||||
if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 ||
|
||||
size < env->me_os_psize) {
|
||||
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size);
|
||||
mdbx_error("lck-file has invalid size %" PRIu64 " bytes", size);
|
||||
err = MDBX_PROBLEM;
|
||||
goto bailout;
|
||||
}
|
||||
@ -8286,7 +8269,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
|
||||
}
|
||||
env->me_maxreaders = (unsigned)maxreaders;
|
||||
|
||||
err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size);
|
||||
err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size,
|
||||
lck_seize_rc);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
@ -14185,7 +14169,7 @@ int __cold mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
|
||||
unsynced_pages = *env->me_unsynced_pages +
|
||||
(*env->me_meta_sync_txnid != (uint32_t)arg->mi_last_pgno);
|
||||
|
||||
arg->mi_mapsize = env->me_mapsize;
|
||||
arg->mi_mapsize = env->me_dxb_mmap.limit;
|
||||
mdbx_compiler_barrier();
|
||||
if (likely(arg->mi_meta0_txnid == mdbx_meta_txnid_fluid(env, meta0) &&
|
||||
arg->mi_meta0_sign == meta0->mm_datasync_sign &&
|
||||
@ -16167,7 +16151,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
|
||||
return mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1) ? MDBX_RESULT_TRUE
|
||||
: MDBX_RESULT_FALSE;
|
||||
}
|
||||
if ((size_t)offset < env->me_mapsize) {
|
||||
if ((size_t)offset < env->me_dxb_mmap.limit) {
|
||||
/* Указатель адресует что-то в пределах mmap, но за границей
|
||||
* распределенных страниц. Такое может случится если mdbx_is_dirty()
|
||||
* вызывает после операции, в ходе которой гразная страница попала
|
||||
|
@ -940,7 +940,6 @@ struct MDBX_env {
|
||||
mdbx_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_fd me_dxb_mmap.fd
|
||||
#define me_mapsize me_dxb_mmap.length
|
||||
mdbx_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
#define me_lck me_lck_mmap.lck
|
||||
@ -1016,13 +1015,16 @@ struct MDBX_env {
|
||||
#endif
|
||||
MDBX_env *me_lcklist_next;
|
||||
|
||||
/* struct me_dbgeo used for accepting db-geo params from user for the new
|
||||
* database creation, i.e. when mdbx_env_set_geometry() was called before
|
||||
* mdbx_env_open(). */
|
||||
struct {
|
||||
size_t lower; /* minimal size of datafile */
|
||||
size_t upper; /* maximal size of datafile */
|
||||
size_t now; /* current size of datafile */
|
||||
size_t grow; /* step to grow datafile */
|
||||
size_t shrink; /* threshold to shrink datafile */
|
||||
} me_dbgeo; /* */
|
||||
} me_dbgeo;
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
MDBX_srwlock me_remap_guard;
|
||||
|
@ -1020,33 +1020,47 @@ MDBX_INTERNAL_FUNC int mdbx_check4nonlocal(mdbx_filehandle_t handle,
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
|
||||
size_t limit) {
|
||||
MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
|
||||
const size_t size, const size_t limit,
|
||||
const bool truncate) {
|
||||
assert(size <= limit);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
map->length = 0;
|
||||
map->limit = 0;
|
||||
map->current = 0;
|
||||
map->section = NULL;
|
||||
map->address = nullptr;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
map->section = NULL;
|
||||
map->filesize = 0;
|
||||
#endif /* Windows */
|
||||
|
||||
NTSTATUS rc = mdbx_check4nonlocal(map->fd, flags);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
return rc;
|
||||
int err = mdbx_check4nonlocal(map->fd, flags);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
rc = mdbx_filesize(map->fd, &map->filesize);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
return rc;
|
||||
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
|
||||
rc = mdbx_ftruncate(map->fd, size);
|
||||
if (rc == MDBX_SUCCESS)
|
||||
map->filesize = size;
|
||||
/* ignore error, because Windows unable shrink file
|
||||
* that already mapped (by another process) */
|
||||
if ((flags & MDBX_RDONLY) == 0 && truncate) {
|
||||
err = mdbx_ftruncate(map->fd, size);
|
||||
if (err != MDBX_SUCCESS)
|
||||
return err;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
map->filesize = size;
|
||||
#else
|
||||
map->current = size;
|
||||
#endif
|
||||
} else {
|
||||
uint64_t filesize;
|
||||
err = mdbx_filesize(map->fd, &filesize);
|
||||
if (err != MDBX_SUCCESS)
|
||||
return err;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
map->filesize = filesize;
|
||||
#else
|
||||
map->current = (filesize > limit) ? limit : (size_t)filesize;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
LARGE_INTEGER SectionSize;
|
||||
SectionSize.QuadPart = size;
|
||||
rc = NtCreateSection(
|
||||
err = NtCreateSection(
|
||||
&map->section,
|
||||
/* DesiredAccess */
|
||||
(flags & MDBX_WRITEMAP)
|
||||
@ -1057,11 +1071,11 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
|
||||
/* SectionPageProtection */
|
||||
(flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE,
|
||||
/* AllocationAttributes */ SEC_RESERVE, map->fd);
|
||||
if (!NT_SUCCESS(rc))
|
||||
return ntstatus2errcode(rc);
|
||||
if (!NT_SUCCESS(err))
|
||||
return ntstatus2errcode(err);
|
||||
|
||||
SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 : limit;
|
||||
rc = NtMapViewOfSection(
|
||||
err = NtMapViewOfSection(
|
||||
map->section, GetCurrentProcess(), &map->address,
|
||||
/* ZeroBits */ 0,
|
||||
/* CommitSize */ 0,
|
||||
@ -1070,44 +1084,42 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
|
||||
/* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE,
|
||||
/* Win32Protect */
|
||||
(flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY);
|
||||
if (!NT_SUCCESS(rc)) {
|
||||
if (!NT_SUCCESS(err)) {
|
||||
NtClose(map->section);
|
||||
map->section = 0;
|
||||
map->address = nullptr;
|
||||
return ntstatus2errcode(rc);
|
||||
return ntstatus2errcode(err);
|
||||
}
|
||||
assert(map->address != MAP_FAILED);
|
||||
|
||||
map->current = (size_t)SectionSize.QuadPart;
|
||||
map->length = ViewSize;
|
||||
return MDBX_SUCCESS;
|
||||
map->limit = ViewSize;
|
||||
|
||||
#else
|
||||
int err = mdbx_check4nonlocal(map->fd, flags);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
(void)size;
|
||||
|
||||
map->address = mmap(
|
||||
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
||||
MAP_SHARED, map->fd, 0);
|
||||
|
||||
if (unlikely(map->address == MAP_FAILED)) {
|
||||
map->length = 0;
|
||||
map->limit = 0;
|
||||
map->current = 0;
|
||||
map->address = nullptr;
|
||||
return errno;
|
||||
}
|
||||
map->length = limit;
|
||||
map->limit = limit;
|
||||
|
||||
#ifdef MADV_DONTFORK
|
||||
if (unlikely(madvise(map->address, map->length, MADV_DONTFORK) != 0))
|
||||
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
|
||||
return errno;
|
||||
#endif
|
||||
|
||||
#ifdef MADV_NOHUGEPAGE
|
||||
(void)madvise(map->address, map->length, MADV_NOHUGEPAGE);
|
||||
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
#endif
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
|
||||
@ -1117,16 +1129,14 @@ MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
|
||||
NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
|
||||
if (!NT_SUCCESS(rc))
|
||||
ntstatus2errcode(rc);
|
||||
#else
|
||||
if (unlikely(munmap(map->address, map->limit)))
|
||||
return errno;
|
||||
#endif
|
||||
|
||||
map->length = 0;
|
||||
map->limit = 0;
|
||||
map->current = 0;
|
||||
map->address = nullptr;
|
||||
#else
|
||||
if (unlikely(munmap(map->address, map->length)))
|
||||
return errno;
|
||||
map->length = 0;
|
||||
map->address = nullptr;
|
||||
#endif
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1134,13 +1144,13 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
||||
size_t limit) {
|
||||
assert(size <= limit);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
assert(size != map->current || limit != map->length || size < map->filesize);
|
||||
assert(size != map->current || limit != map->limit || size < map->filesize);
|
||||
|
||||
NTSTATUS status;
|
||||
LARGE_INTEGER SectionSize;
|
||||
int err, rc = MDBX_SUCCESS;
|
||||
|
||||
if (!(flags & MDBX_RDONLY) && limit == map->length && size > map->current) {
|
||||
if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current) {
|
||||
/* growth rw-section */
|
||||
SectionSize.QuadPart = size;
|
||||
status = NtExtendSection(map->section, &SectionSize);
|
||||
@ -1152,10 +1162,10 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
||||
return ntstatus2errcode(status);
|
||||
}
|
||||
|
||||
if (limit > map->length) {
|
||||
if (limit > map->limit) {
|
||||
/* check ability of address space for growth before umnap */
|
||||
PVOID BaseAddress = (PBYTE)map->address + map->length;
|
||||
SIZE_T RegionSize = limit - map->length;
|
||||
PVOID BaseAddress = (PBYTE)map->address + map->limit;
|
||||
SIZE_T RegionSize = limit - map->limit;
|
||||
status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0,
|
||||
&RegionSize, MEM_RESERVE, PAGE_NOACCESS);
|
||||
if (!NT_SUCCESS(status))
|
||||
@ -1185,7 +1195,7 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
||||
err = ntstatus2errcode(status);
|
||||
bailout:
|
||||
map->address = NULL;
|
||||
map->current = map->length = 0;
|
||||
map->current = map->limit = 0;
|
||||
if (ReservedAddress)
|
||||
(void)NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress,
|
||||
&ReservedSize, MEM_RELEASE);
|
||||
@ -1268,12 +1278,12 @@ retry_mapview:;
|
||||
NtClose(map->section);
|
||||
map->section = NULL;
|
||||
|
||||
if (map->address && (size != map->current || limit != map->length)) {
|
||||
if (map->address && (size != map->current || limit != map->limit)) {
|
||||
/* try remap with previously size and limit,
|
||||
* but will return MDBX_RESULT_TRUE on success */
|
||||
rc = MDBX_RESULT_TRUE;
|
||||
size = map->current;
|
||||
limit = map->length;
|
||||
limit = map->limit;
|
||||
goto retry_file_and_section;
|
||||
}
|
||||
|
||||
@ -1283,28 +1293,54 @@ retry_mapview:;
|
||||
assert(map->address != MAP_FAILED);
|
||||
|
||||
map->current = (size_t)SectionSize.QuadPart;
|
||||
map->length = ViewSize;
|
||||
return rc;
|
||||
map->limit = ViewSize;
|
||||
#else
|
||||
if (limit != map->length) {
|
||||
|
||||
uint64_t filesize;
|
||||
int rc = mdbx_filesize(map->fd, &filesize);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
return rc;
|
||||
|
||||
if (flags & MDBX_RDONLY) {
|
||||
map->current = (filesize > limit) ? limit : (size_t)filesize;
|
||||
if (map->current != size)
|
||||
rc = MDBX_RESULT_TRUE;
|
||||
} else if (filesize != size) {
|
||||
rc = mdbx_ftruncate(map->fd, size);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
return rc;
|
||||
map->current = size;
|
||||
}
|
||||
|
||||
if (limit != map->limit) {
|
||||
#if defined(_GNU_SOURCE) && (defined(__linux__) || defined(__gnu_linux__))
|
||||
void *ptr = mremap(map->address, map->length, limit,
|
||||
void *ptr = mremap(map->address, map->limit, limit,
|
||||
/* LY: in case changing the mapping size calling code
|
||||
must guarantees the absence of competing threads, and
|
||||
a willingness to another base address */
|
||||
must guarantees the absence of competing threads,
|
||||
and a willingness to another base address */
|
||||
MREMAP_MAYMOVE);
|
||||
if (ptr == MAP_FAILED) {
|
||||
int err = errno;
|
||||
return (err == EAGAIN || err == ENOMEM) ? MDBX_RESULT_TRUE : err;
|
||||
rc = errno;
|
||||
return (rc == EAGAIN || rc == ENOMEM) ? MDBX_RESULT_TRUE : rc;
|
||||
}
|
||||
map->address = ptr;
|
||||
map->length = limit;
|
||||
map->limit = limit;
|
||||
|
||||
#ifdef MADV_DONTFORK
|
||||
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
|
||||
return errno;
|
||||
#endif
|
||||
|
||||
#ifdef MADV_NOHUGEPAGE
|
||||
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
|
||||
#endif
|
||||
|
||||
#else
|
||||
return MDBX_RESULT_TRUE;
|
||||
rc = MDBX_RESULT_TRUE;
|
||||
#endif /* _GNU_SOURCE && __linux__ */
|
||||
}
|
||||
return (flags & MDBX_RDONLY) ? MDBX_SUCCESS : mdbx_ftruncate(map->fd, size);
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
@ -622,18 +622,19 @@ typedef struct mdbx_mmap_param {
|
||||
struct MDBX_lockinfo *lck;
|
||||
};
|
||||
mdbx_filehandle_t fd;
|
||||
size_t length; /* mapping length, but NOT a size of file or DB */
|
||||
size_t limit; /* mapping length, but NOT a size of file nor DB */
|
||||
size_t current; /* mapped region size, i.e. the size of file and DB */
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
size_t current; /* mapped region size, e.g. file and DB */
|
||||
uint64_t filesize;
|
||||
uint64_t filesize /* in-process cache of a file size. */;
|
||||
#endif
|
||||
#ifdef MDBX_OSAL_SECTION
|
||||
MDBX_OSAL_SECTION section;
|
||||
#endif
|
||||
} mdbx_mmap_t;
|
||||
|
||||
MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t must,
|
||||
size_t limit);
|
||||
MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
|
||||
const size_t must, const size_t limit,
|
||||
const bool truncate);
|
||||
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
|
||||
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
|
||||
size_t wanna);
|
||||
|
Loading…
x
Reference in New Issue
Block a user