mdbx: refactor me_dbgeo usage and osal-mmap/mresize().

Change-Id: I1f29c953abcbd4f2bab7ba52e7dd9da85ea48354
This commit is contained in:
Leonid Yuriev 2019-10-28 18:01:02 +03:00
parent b4729bd1d6
commit 415cb5f886
4 changed files with 190 additions and 167 deletions

View File

@ -3411,7 +3411,6 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset, static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
const size_t length, const bool enable) { const size_t length, const bool enable) {
assert(length > 0); assert(length > 0);
mdbx_notice("readahead %s %u..%u", enable ? "ON" : "OFF", mdbx_notice("readahead %s %u..%u", enable ? "ON" : "OFF",
bytes2pgno(env, offset), bytes2pgno(env, offset + length)); bytes2pgno(env, offset), bytes2pgno(env, offset + length));
@ -3466,18 +3465,14 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno, static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
const pgno_t limit_pgno) { const pgno_t limit_pgno) {
#ifdef MDBX_USE_VALGRIND
const size_t prev_mapsize = env->me_mapsize;
void *const prev_mapaddr = env->me_map;
#endif
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
mdbx_verbose("resize datafile/mapping: " mdbx_verbose("resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", " "present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR, "limit %" PRIuPTR " -> %" PRIuPTR,
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, limit_bytes); env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
limit_bytes);
mdbx_assert(env, limit_bytes >= size_bytes); mdbx_assert(env, limit_bytes >= size_bytes);
mdbx_assert(env, bytes2pgno(env, size_bytes) >= size_pgno); mdbx_assert(env, bytes2pgno(env, size_bytes) >= size_pgno);
@ -3491,9 +3486,9 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
mdbx_handle_array_t *suspended = NULL; mdbx_handle_array_t *suspended = NULL;
mdbx_handle_array_t array_onstack; mdbx_handle_array_t array_onstack;
int rc = MDBX_SUCCESS; int rc = MDBX_SUCCESS;
if (limit_bytes == env->me_dxb_mmap.length && if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current && size_bytes == env->me_dxb_mmap.current &&
env->me_dxb_mmap.current == env->me_dxb_mmap.filesize) size_bytes == env->me_dxb_mmap.filesize)
goto bailout; goto bailout;
/* 1) Windows allows only extending a read-write section, but not a /* 1) Windows allows only extending a read-write section, but not a
@ -3517,27 +3512,31 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard); int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
return rc; return rc;
if (limit_bytes == env->me_dxb_mmap.length && size_bytes == env->me_dbgeo.now) if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current)
goto bailout; goto bailout;
#endif /* Windows */ #endif /* Windows */
if (size_bytes < env->me_dbgeo.now) { const size_t prev_limit = env->me_dxb_mmap.limit;
const void *const prev_addr = env->me_map;
const size_t prev_size = env->me_dxb_mmap.current;
if (size_bytes < prev_size) {
mdbx_notice("resize-MADV_%s %u..%u", mdbx_notice("resize-MADV_%s %u..%u",
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", (env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED",
size_pgno, bytes2pgno(env, env->me_dbgeo.now)); size_pgno, bytes2pgno(env, prev_size));
#if defined(MADV_REMOVE) #if defined(MADV_REMOVE)
if ((env->me_flags & MDBX_WRITEMAP) == 0 || if ((env->me_flags & MDBX_WRITEMAP) == 0 ||
madvise(env->me_map + size_bytes, env->me_dbgeo.now - size_bytes, madvise(env->me_map + size_bytes, prev_size - size_bytes,
MADV_REMOVE) != 0) MADV_REMOVE) != 0)
#endif #endif
#if defined(MADV_DONTNEED) #if defined(MADV_DONTNEED)
(void)madvise(env->me_map + size_bytes, env->me_dbgeo.now - size_bytes, (void)madvise(env->me_map + size_bytes, prev_size - size_bytes,
MADV_DONTNEED); MADV_DONTNEED);
#elif defined(POSIX_MADV_DONTNEED) #elif defined(POSIX_MADV_DONTNEED)
(void)posix_madvise(env->me_map + size_bytes, (void)posix_madvise(env->me_map + size_bytes, prev_size - size_bytes,
env->me_dbgeo.now - size_bytes, POSIX_MADV_DONTNEED); POSIX_MADV_DONTNEED);
#elif defined(POSIX_FADV_DONTNEED) #elif defined(POSIX_FADV_DONTNEED)
(void)posix_fadvise(env->me_fd, size_bytes, env->me_dbgeo.now - size_bytes, (void)posix_fadvise(env->me_fd, size_bytes, prev_size - size_bytes,
POSIX_FADV_DONTNEED); POSIX_FADV_DONTNEED);
#else #else
__noop(); __noop();
@ -3548,24 +3547,25 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes); rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) { if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
const size_t readahead_offset =
(limit_bytes != env->me_dbgeo.upper
#if defined(_WIN32) || defined(_WIN64)
|| env->me_dbgeo.now > size_bytes
#endif /* Windows */
)
? 0 /* reassign readahead to the entire map
because it (likely) was remapped */
: env->me_dbgeo.now;
rc = mdbx_is_readahead_reasonable(size_bytes, 0); rc = mdbx_is_readahead_reasonable(size_bytes, 0);
if (rc == MDBX_RESULT_FALSE) if (rc == MDBX_RESULT_FALSE)
rc = mdbx_set_readahead(env, 0, size_bytes, false); rc = mdbx_set_readahead(
env, 0, (size_bytes > prev_size) ? size_bytes : prev_size, false);
else if (rc == MDBX_RESULT_TRUE) { else if (rc == MDBX_RESULT_TRUE) {
rc = MDBX_SUCCESS; rc = MDBX_SUCCESS;
if (size_bytes > readahead_offset) { const size_t readahead_pivot =
(limit_bytes != prev_limit || env->me_dxb_mmap.address != prev_addr
#if defined(_WIN32) || defined(_WIN64)
|| prev_size > size_bytes
#endif /* Windows */
)
? 0 /* reassign readahead to the entire map
because it was remapped */
: prev_size;
if (size_bytes > readahead_pivot) {
*env->me_discarded_tail = size_pgno; *env->me_discarded_tail = size_pgno;
rc = mdbx_set_readahead(env, readahead_offset, rc = mdbx_set_readahead(env, readahead_pivot,
size_bytes - readahead_offset, true); size_bytes - readahead_pivot, true);
} }
} }
} }
@ -3575,35 +3575,33 @@ bailout:
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
mdbx_assert(env, size_bytes == env->me_dxb_mmap.current); mdbx_assert(env, size_bytes == env->me_dxb_mmap.current);
mdbx_assert(env, size_bytes <= env->me_dxb_mmap.filesize); mdbx_assert(env, size_bytes <= env->me_dxb_mmap.filesize);
mdbx_assert(env, limit_bytes == env->me_dxb_mmap.length); mdbx_assert(env, limit_bytes == env->me_dxb_mmap.limit);
#endif #endif /* Windows */
env->me_dbgeo.now = size_bytes;
env->me_dbgeo.upper = limit_bytes;
if (env->me_txn) { if (env->me_txn) {
mdbx_tassert(env->me_txn, size_pgno >= env->me_txn->mt_next_pgno); mdbx_tassert(env->me_txn, size_pgno >= env->me_txn->mt_next_pgno);
env->me_txn->mt_end_pgno = env->me_txn0->mt_end_pgno = size_pgno; env->me_txn->mt_end_pgno = env->me_txn0->mt_end_pgno = size_pgno;
} }
#ifdef MDBX_USE_VALGRIND #ifdef MDBX_USE_VALGRIND
if (prev_mapsize != env->me_mapsize || prev_mapaddr != env->me_map) { if (prev_limit != env->me_dxb_mmap.limit || prev_addr != env->me_map) {
VALGRIND_DISCARD(env->me_valgrind_handle); VALGRIND_DISCARD(env->me_valgrind_handle);
env->me_valgrind_handle = 0; env->me_valgrind_handle = 0;
if (env->me_mapsize) if (env->me_dxb_mmap.limit)
env->me_valgrind_handle = env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx"); VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
} }
#endif #endif /* MDBX_USE_VALGRIND */
} else { } else {
if (rc != MDBX_RESULT_TRUE) { if (rc != MDBX_RESULT_TRUE) {
mdbx_error("failed resize datafile/mapping: " mdbx_error("failed resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", " "present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
limit_bytes, rc); limit_bytes, rc);
} else { } else {
mdbx_notice("unable resize datafile/mapping: " mdbx_notice("unable resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", " "present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
limit_bytes, rc); limit_bytes, rc);
} }
if (!env->me_dxb_mmap.address) { if (!env->me_dxb_mmap.address) {
@ -4775,7 +4773,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
rc = MDBX_PANIC; rc = MDBX_PANIC;
} else { } else {
const size_t size = pgno2bytes(env, txn->mt_end_pgno); const size_t size = pgno2bytes(env, txn->mt_end_pgno);
if (unlikely(size > env->me_mapsize)) { if (unlikely(size > env->me_dxb_mmap.limit)) {
if (txn->mt_geo.upper > MAX_PAGENO || if (txn->mt_geo.upper > MAX_PAGENO ||
bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) != bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) !=
txn->mt_geo.upper) { txn->mt_geo.upper) {
@ -4797,7 +4795,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
} }
#endif #endif
} else { } else {
env->me_dbgeo.now = size; env->me_dxb_mmap.current = size;
} }
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
mdbx_txn_valgrind(env, txn); mdbx_txn_valgrind(env, txn);
@ -7943,14 +7941,14 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
} }
err = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, err = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
env->me_dbgeo.upper); env->me_dbgeo.upper, lck_rc);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP) #if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
const size_t meta_length = pgno2bytes(env, NUM_METAS); const size_t meta_length = pgno2bytes(env, NUM_METAS);
(void)madvise(env->me_map, meta_length, MADV_DODUMP); (void)madvise(env->me_map, meta_length, MADV_DODUMP);
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length, (void)madvise(env->me_map + meta_length, env->me_dxb_mmap.limit - meta_length,
(mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP (mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP
: MADV_DONTDUMP); : MADV_DONTDUMP);
#endif #endif
@ -7958,51 +7956,53 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
const size_t used_aligned2os_bytes = const size_t used_aligned2os_bytes =
roundup_powerof2(used_bytes, env->me_os_psize); roundup_powerof2(used_bytes, env->me_os_psize);
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes); *env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
if (used_aligned2os_bytes < env->me_dbgeo.now) { if (used_aligned2os_bytes < env->me_dxb_mmap.current) {
#if defined(MADV_REMOVE) #if defined(MADV_REMOVE)
if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0) { if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0) {
mdbx_notice("open-MADV_%s %u..%u", "REMOVE", *env->me_discarded_tail, mdbx_notice("open-MADV_%s %u..%u", "REMOVE", *env->me_discarded_tail,
bytes2pgno(env, env->me_dbgeo.now)); bytes2pgno(env, env->me_dxb_mmap.current));
(void)madvise(env->me_map + used_aligned2os_bytes, (void)madvise(env->me_map + used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes, MADV_REMOVE); env->me_dxb_mmap.current - used_aligned2os_bytes,
MADV_REMOVE);
} }
#endif /* MADV_REMOVE */ #endif /* MADV_REMOVE */
#if defined(MADV_DONTNEED) #if defined(MADV_DONTNEED)
mdbx_notice("open-MADV_%s %u..%u", "DONTNEED", *env->me_discarded_tail, mdbx_notice("open-MADV_%s %u..%u", "DONTNEED", *env->me_discarded_tail,
bytes2pgno(env, env->me_dbgeo.now)); bytes2pgno(env, env->me_dxb_mmap.current));
(void)madvise(env->me_map + used_aligned2os_bytes, (void)madvise(env->me_map + used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes, MADV_DONTNEED); env->me_dxb_mmap.current - used_aligned2os_bytes,
MADV_DONTNEED);
#elif defined(POSIX_MADV_DONTNEED) #elif defined(POSIX_MADV_DONTNEED)
(void)madvise(env->me_map + used_aligned2os_bytes, (void)madvise(env->me_map + used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes, env->me_dxb_mmap.current - used_aligned2os_bytes,
POSIX_MADV_DONTNEED); POSIX_MADV_DONTNEED);
#elif defined(POSIX_FADV_DONTNEED) #elif defined(POSIX_FADV_DONTNEED)
(void)posix_fadvise(env->me_fd, used_aligned2os_bytes, (void)posix_fadvise(env->me_fd, used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes, env->me_dxb_mmap.current - used_aligned2os_bytes,
POSIX_FADV_DONTNEED); POSIX_FADV_DONTNEED);
#endif /* MADV_DONTNEED */ #endif /* MADV_DONTNEED */
} }
#ifdef MDBX_USE_VALGRIND #ifdef MDBX_USE_VALGRIND
env->me_valgrind_handle = env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx"); VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
#endif #endif
const bool readahead = const bool readahead = (env->me_flags & MDBX_NORDAHEAD) == 0 &&
(env->me_flags & MDBX_NORDAHEAD) == 0 && mdbx_is_readahead_reasonable(env->me_dxb_mmap.current,
mdbx_is_readahead_reasonable(env->me_dbgeo.now, 0) == MDBX_RESULT_TRUE; 0) == MDBX_RESULT_TRUE;
err = mdbx_set_readahead(env, 0, env->me_dbgeo.now, readahead); err = mdbx_set_readahead(env, 0, env->me_dxb_mmap.current, readahead);
if (err != MDBX_SUCCESS) if (err != MDBX_SUCCESS)
return err; return err;
mdbx_assert(env, used_bytes >= pgno2bytes(env, NUM_METAS) && mdbx_assert(env, used_bytes >= pgno2bytes(env, NUM_METAS) &&
used_bytes <= env->me_mapsize); used_bytes <= env->me_dxb_mmap.limit);
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
VALGRIND_MAKE_MEM_NOACCESS(env->me_map + used_bytes, VALGRIND_MAKE_MEM_NOACCESS(env->me_map + used_bytes,
env->me_mapsize - used_bytes); env->me_dxb_mmap.limit - used_bytes);
ASAN_POISON_MEMORY_REGION(env->me_map + used_bytes, ASAN_POISON_MEMORY_REGION(env->me_map + used_bytes,
env->me_mapsize - used_bytes); env->me_dxb_mmap.limit - used_bytes);
env->me_poison_edge = bytes2pgno(env, env->me_mapsize); env->me_poison_edge = bytes2pgno(env, env->me_dxb_mmap.limit);
#endif /* MDBX_USE_VALGRIND */ #endif /* MDBX_USE_VALGRIND */
/* NOTE: AddressSanitizer (at least GCC 7.x, 8.x) could generate /* NOTE: AddressSanitizer (at least GCC 7.x, 8.x) could generate
@ -8097,29 +8097,19 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
const MDBX_meta *head = mdbx_meta_head(env); const MDBX_meta *head = mdbx_meta_head(env);
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) { if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
/* re-check file size after mmap */ /* re-check size after mmap */
uint64_t filesize_after_mmap; if ((env->me_dxb_mmap.current & (env->me_os_psize - 1)) != 0 ||
err = mdbx_filesize(env->me_fd, &filesize_after_mmap); env->me_dxb_mmap.current < used_bytes) {
if (unlikely(err != MDBX_SUCCESS)) mdbx_error("unacceptable/unexpected datafile size %" PRIuPTR,
return err; env->me_dxb_mmap.current);
if (filesize_after_mmap != expected_bytes) { return MDBX_PROBLEM;
if (filesize_after_mmap != filesize_before_mmap) }
mdbx_verbose("datafile resized by system to %" PRIu64 " bytes", if (env->me_dxb_mmap.current != expected_bytes &&
filesize_after_mmap); (env->me_flags & MDBX_RDONLY) == 0) {
if (filesize_after_mmap % env->me_os_psize || meta.mm_geo.now = bytes2pgno(env, env->me_dxb_mmap.current);
filesize_after_mmap > env->me_dbgeo.upper || mdbx_verbose("update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO
filesize_after_mmap < used_bytes) { " pages",
mdbx_error("unacceptable/unexpected datafile size %" PRIu64, env->me_dxb_mmap.current, meta.mm_geo.now);
filesize_after_mmap);
return MDBX_PROBLEM;
}
if ((env->me_flags & MDBX_RDONLY) == 0) {
meta.mm_geo.now =
bytes2pgno(env, env->me_dbgeo.now = (size_t)filesize_after_mmap);
mdbx_verbose("update meta-geo to filesize %" PRIuPTR
" bytes, %" PRIaPGNO " pages",
env->me_dbgeo.now, meta.mm_geo.now);
}
} }
if (memcmp(&meta.mm_geo, &head->mm_geo, sizeof(meta.mm_geo))) { if (memcmp(&meta.mm_geo, &head->mm_geo, sizeof(meta.mm_geo))) {
@ -8248,30 +8238,23 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
goto bailout; goto bailout;
if (lck_seize_rc == MDBX_RESULT_TRUE) { if (lck_seize_rc == MDBX_RESULT_TRUE) {
uint64_t wanna = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + size = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) +
sizeof(MDBX_lockinfo), sizeof(MDBX_lockinfo),
env->me_os_psize); env->me_os_psize);
#ifndef NDEBUG #ifndef NDEBUG
err = mdbx_ftruncate(env->me_lfd, size = 0); err = mdbx_ftruncate(env->me_lfd, 0);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
goto bailout; goto bailout;
#endif #endif
mdbx_jitter4testing(false); mdbx_jitter4testing(false);
if (size != wanna) {
err = mdbx_ftruncate(env->me_lfd, wanna);
if (unlikely(err != MDBX_SUCCESS))
goto bailout;
size = wanna;
}
} else { } else {
if (env->me_flags & MDBX_EXCLUSIVE) { if (env->me_flags & MDBX_EXCLUSIVE) {
err = MDBX_BUSY; err = MDBX_BUSY;
goto bailout; goto bailout;
} }
if (size > PTRDIFF_MAX || (size & (env->me_os_psize - 1)) || if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 ||
size < env->me_os_psize) { size < env->me_os_psize) {
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size); mdbx_error("lck-file has invalid size %" PRIu64 " bytes", size);
err = MDBX_PROBLEM; err = MDBX_PROBLEM;
goto bailout; goto bailout;
} }
@ -8286,7 +8269,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
} }
env->me_maxreaders = (unsigned)maxreaders; env->me_maxreaders = (unsigned)maxreaders;
err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size); err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size,
lck_seize_rc);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
goto bailout; goto bailout;
@ -14185,7 +14169,7 @@ int __cold mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
unsynced_pages = *env->me_unsynced_pages + unsynced_pages = *env->me_unsynced_pages +
(*env->me_meta_sync_txnid != (uint32_t)arg->mi_last_pgno); (*env->me_meta_sync_txnid != (uint32_t)arg->mi_last_pgno);
arg->mi_mapsize = env->me_mapsize; arg->mi_mapsize = env->me_dxb_mmap.limit;
mdbx_compiler_barrier(); mdbx_compiler_barrier();
if (likely(arg->mi_meta0_txnid == mdbx_meta_txnid_fluid(env, meta0) && if (likely(arg->mi_meta0_txnid == mdbx_meta_txnid_fluid(env, meta0) &&
arg->mi_meta0_sign == meta0->mm_datasync_sign && arg->mi_meta0_sign == meta0->mm_datasync_sign &&
@ -16167,7 +16151,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
return mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1) ? MDBX_RESULT_TRUE return mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1) ? MDBX_RESULT_TRUE
: MDBX_RESULT_FALSE; : MDBX_RESULT_FALSE;
} }
if ((size_t)offset < env->me_mapsize) { if ((size_t)offset < env->me_dxb_mmap.limit) {
/* Указатель адресует что-то в пределах mmap, но за границей /* Указатель адресует что-то в пределах mmap, но за границей
* распределенных страниц. Такое может случится если mdbx_is_dirty() * распределенных страниц. Такое может случится если mdbx_is_dirty()
* вызывает после операции, в ходе которой гразная страница попала * вызывает после операции, в ходе которой гразная страница попала

View File

@ -940,7 +940,6 @@ struct MDBX_env {
mdbx_mmap_t me_dxb_mmap; /* The main data file */ mdbx_mmap_t me_dxb_mmap; /* The main data file */
#define me_map me_dxb_mmap.dxb #define me_map me_dxb_mmap.dxb
#define me_fd me_dxb_mmap.fd #define me_fd me_dxb_mmap.fd
#define me_mapsize me_dxb_mmap.length
mdbx_mmap_t me_lck_mmap; /* The lock file */ mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd #define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck #define me_lck me_lck_mmap.lck
@ -1016,13 +1015,16 @@ struct MDBX_env {
#endif #endif
MDBX_env *me_lcklist_next; MDBX_env *me_lcklist_next;
/* struct me_dbgeo used for accepting db-geo params from user for the new
* database creation, i.e. when mdbx_env_set_geometry() was called before
* mdbx_env_open(). */
struct { struct {
size_t lower; /* minimal size of datafile */ size_t lower; /* minimal size of datafile */
size_t upper; /* maximal size of datafile */ size_t upper; /* maximal size of datafile */
size_t now; /* current size of datafile */ size_t now; /* current size of datafile */
size_t grow; /* step to grow datafile */ size_t grow; /* step to grow datafile */
size_t shrink; /* threshold to shrink datafile */ size_t shrink; /* threshold to shrink datafile */
} me_dbgeo; /* */ } me_dbgeo;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
MDBX_srwlock me_remap_guard; MDBX_srwlock me_remap_guard;

View File

@ -1020,33 +1020,47 @@ MDBX_INTERNAL_FUNC int mdbx_check4nonlocal(mdbx_filehandle_t handle,
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
size_t limit) { const size_t size, const size_t limit,
const bool truncate) {
assert(size <= limit); assert(size <= limit);
#if defined(_WIN32) || defined(_WIN64) map->limit = 0;
map->length = 0;
map->current = 0; map->current = 0;
map->section = NULL;
map->address = nullptr; map->address = nullptr;
#if defined(_WIN32) || defined(_WIN64)
map->section = NULL;
map->filesize = 0;
#endif /* Windows */
NTSTATUS rc = mdbx_check4nonlocal(map->fd, flags); int err = mdbx_check4nonlocal(map->fd, flags);
if (rc != MDBX_SUCCESS) if (unlikely(err != MDBX_SUCCESS))
return rc; return err;
rc = mdbx_filesize(map->fd, &map->filesize); if ((flags & MDBX_RDONLY) == 0 && truncate) {
if (rc != MDBX_SUCCESS) err = mdbx_ftruncate(map->fd, size);
return rc; if (err != MDBX_SUCCESS)
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) { return err;
rc = mdbx_ftruncate(map->fd, size); #if defined(_WIN32) || defined(_WIN64)
if (rc == MDBX_SUCCESS) map->filesize = size;
map->filesize = size; #else
/* ignore error, because Windows unable shrink file map->current = size;
* that already mapped (by another process) */ #endif
} else {
uint64_t filesize;
err = mdbx_filesize(map->fd, &filesize);
if (err != MDBX_SUCCESS)
return err;
#if defined(_WIN32) || defined(_WIN64)
map->filesize = filesize;
#else
map->current = (filesize > limit) ? limit : (size_t)filesize;
#endif
} }
#if defined(_WIN32) || defined(_WIN64)
LARGE_INTEGER SectionSize; LARGE_INTEGER SectionSize;
SectionSize.QuadPart = size; SectionSize.QuadPart = size;
rc = NtCreateSection( err = NtCreateSection(
&map->section, &map->section,
/* DesiredAccess */ /* DesiredAccess */
(flags & MDBX_WRITEMAP) (flags & MDBX_WRITEMAP)
@ -1057,11 +1071,11 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
/* SectionPageProtection */ /* SectionPageProtection */
(flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE,
/* AllocationAttributes */ SEC_RESERVE, map->fd); /* AllocationAttributes */ SEC_RESERVE, map->fd);
if (!NT_SUCCESS(rc)) if (!NT_SUCCESS(err))
return ntstatus2errcode(rc); return ntstatus2errcode(err);
SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 : limit; SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 : limit;
rc = NtMapViewOfSection( err = NtMapViewOfSection(
map->section, GetCurrentProcess(), &map->address, map->section, GetCurrentProcess(), &map->address,
/* ZeroBits */ 0, /* ZeroBits */ 0,
/* CommitSize */ 0, /* CommitSize */ 0,
@ -1070,44 +1084,42 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
/* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE,
/* Win32Protect */ /* Win32Protect */
(flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY);
if (!NT_SUCCESS(rc)) { if (!NT_SUCCESS(err)) {
NtClose(map->section); NtClose(map->section);
map->section = 0; map->section = 0;
map->address = nullptr; map->address = nullptr;
return ntstatus2errcode(rc); return ntstatus2errcode(err);
} }
assert(map->address != MAP_FAILED); assert(map->address != MAP_FAILED);
map->current = (size_t)SectionSize.QuadPart; map->current = (size_t)SectionSize.QuadPart;
map->length = ViewSize; map->limit = ViewSize;
return MDBX_SUCCESS;
#else #else
int err = mdbx_check4nonlocal(map->fd, flags);
if (unlikely(err != MDBX_SUCCESS))
return err;
(void)size;
map->address = mmap( map->address = mmap(
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
MAP_SHARED, map->fd, 0); MAP_SHARED, map->fd, 0);
if (unlikely(map->address == MAP_FAILED)) { if (unlikely(map->address == MAP_FAILED)) {
map->length = 0; map->limit = 0;
map->current = 0;
map->address = nullptr; map->address = nullptr;
return errno; return errno;
} }
map->length = limit; map->limit = limit;
#ifdef MADV_DONTFORK #ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->length, MADV_DONTFORK) != 0)) if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
return errno; return errno;
#endif #endif
#ifdef MADV_NOHUGEPAGE #ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->length, MADV_NOHUGEPAGE); (void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
#endif
#endif #endif
return MDBX_SUCCESS; return MDBX_SUCCESS;
#endif
} }
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) { MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
@ -1117,16 +1129,14 @@ MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address); NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
if (!NT_SUCCESS(rc)) if (!NT_SUCCESS(rc))
ntstatus2errcode(rc); ntstatus2errcode(rc);
#else
if (unlikely(munmap(map->address, map->limit)))
return errno;
#endif
map->length = 0; map->limit = 0;
map->current = 0; map->current = 0;
map->address = nullptr; map->address = nullptr;
#else
if (unlikely(munmap(map->address, map->length)))
return errno;
map->length = 0;
map->address = nullptr;
#endif
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -1134,13 +1144,13 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
size_t limit) { size_t limit) {
assert(size <= limit); assert(size <= limit);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
assert(size != map->current || limit != map->length || size < map->filesize); assert(size != map->current || limit != map->limit || size < map->filesize);
NTSTATUS status; NTSTATUS status;
LARGE_INTEGER SectionSize; LARGE_INTEGER SectionSize;
int err, rc = MDBX_SUCCESS; int err, rc = MDBX_SUCCESS;
if (!(flags & MDBX_RDONLY) && limit == map->length && size > map->current) { if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current) {
/* growth rw-section */ /* growth rw-section */
SectionSize.QuadPart = size; SectionSize.QuadPart = size;
status = NtExtendSection(map->section, &SectionSize); status = NtExtendSection(map->section, &SectionSize);
@ -1152,10 +1162,10 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
return ntstatus2errcode(status); return ntstatus2errcode(status);
} }
if (limit > map->length) { if (limit > map->limit) {
/* check ability of address space for growth before umnap */ /* check ability of address space for growth before umnap */
PVOID BaseAddress = (PBYTE)map->address + map->length; PVOID BaseAddress = (PBYTE)map->address + map->limit;
SIZE_T RegionSize = limit - map->length; SIZE_T RegionSize = limit - map->limit;
status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0, status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0,
&RegionSize, MEM_RESERVE, PAGE_NOACCESS); &RegionSize, MEM_RESERVE, PAGE_NOACCESS);
if (!NT_SUCCESS(status)) if (!NT_SUCCESS(status))
@ -1185,7 +1195,7 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
err = ntstatus2errcode(status); err = ntstatus2errcode(status);
bailout: bailout:
map->address = NULL; map->address = NULL;
map->current = map->length = 0; map->current = map->limit = 0;
if (ReservedAddress) if (ReservedAddress)
(void)NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress, (void)NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress,
&ReservedSize, MEM_RELEASE); &ReservedSize, MEM_RELEASE);
@ -1268,12 +1278,12 @@ retry_mapview:;
NtClose(map->section); NtClose(map->section);
map->section = NULL; map->section = NULL;
if (map->address && (size != map->current || limit != map->length)) { if (map->address && (size != map->current || limit != map->limit)) {
/* try remap with previously size and limit, /* try remap with previously size and limit,
* but will return MDBX_RESULT_TRUE on success */ * but will return MDBX_RESULT_TRUE on success */
rc = MDBX_RESULT_TRUE; rc = MDBX_RESULT_TRUE;
size = map->current; size = map->current;
limit = map->length; limit = map->limit;
goto retry_file_and_section; goto retry_file_and_section;
} }
@ -1283,28 +1293,54 @@ retry_mapview:;
assert(map->address != MAP_FAILED); assert(map->address != MAP_FAILED);
map->current = (size_t)SectionSize.QuadPart; map->current = (size_t)SectionSize.QuadPart;
map->length = ViewSize; map->limit = ViewSize;
return rc;
#else #else
if (limit != map->length) {
uint64_t filesize;
int rc = mdbx_filesize(map->fd, &filesize);
if (rc != MDBX_SUCCESS)
return rc;
if (flags & MDBX_RDONLY) {
map->current = (filesize > limit) ? limit : (size_t)filesize;
if (map->current != size)
rc = MDBX_RESULT_TRUE;
} else if (filesize != size) {
rc = mdbx_ftruncate(map->fd, size);
if (rc != MDBX_SUCCESS)
return rc;
map->current = size;
}
if (limit != map->limit) {
#if defined(_GNU_SOURCE) && (defined(__linux__) || defined(__gnu_linux__)) #if defined(_GNU_SOURCE) && (defined(__linux__) || defined(__gnu_linux__))
void *ptr = mremap(map->address, map->length, limit, void *ptr = mremap(map->address, map->limit, limit,
/* LY: in case changing the mapping size calling code /* LY: in case changing the mapping size calling code
must guarantees the absence of competing threads, and must guarantees the absence of competing threads,
a willingness to another base address */ and a willingness to another base address */
MREMAP_MAYMOVE); MREMAP_MAYMOVE);
if (ptr == MAP_FAILED) { if (ptr == MAP_FAILED) {
int err = errno; rc = errno;
return (err == EAGAIN || err == ENOMEM) ? MDBX_RESULT_TRUE : err; return (rc == EAGAIN || rc == ENOMEM) ? MDBX_RESULT_TRUE : rc;
} }
map->address = ptr; map->address = ptr;
map->length = limit; map->limit = limit;
#ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
return errno;
#endif
#ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
#endif
#else #else
return MDBX_RESULT_TRUE; rc = MDBX_RESULT_TRUE;
#endif /* _GNU_SOURCE && __linux__ */ #endif /* _GNU_SOURCE && __linux__ */
} }
return (flags & MDBX_RDONLY) ? MDBX_SUCCESS : mdbx_ftruncate(map->fd, size);
#endif #endif
return rc;
} }
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/

View File

@ -622,18 +622,19 @@ typedef struct mdbx_mmap_param {
struct MDBX_lockinfo *lck; struct MDBX_lockinfo *lck;
}; };
mdbx_filehandle_t fd; mdbx_filehandle_t fd;
size_t length; /* mapping length, but NOT a size of file or DB */ size_t limit; /* mapping length, but NOT a size of file nor DB */
size_t current; /* mapped region size, i.e. the size of file and DB */
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
size_t current; /* mapped region size, e.g. file and DB */ uint64_t filesize /* in-process cache of a file size. */;
uint64_t filesize;
#endif #endif
#ifdef MDBX_OSAL_SECTION #ifdef MDBX_OSAL_SECTION
MDBX_OSAL_SECTION section; MDBX_OSAL_SECTION section;
#endif #endif
} mdbx_mmap_t; } mdbx_mmap_t;
MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t must, MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
size_t limit); const size_t must, const size_t limit,
const bool truncate);
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map); MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current, MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
size_t wanna); size_t wanna);