mdbx: refactor me_dbgeo usage and osal-mmap/mresize().

Change-Id: I1f29c953abcbd4f2bab7ba52e7dd9da85ea48354
This commit is contained in:
Leonid Yuriev 2019-10-28 18:01:02 +03:00
parent b4729bd1d6
commit 415cb5f886
4 changed files with 190 additions and 167 deletions

View File

@ -3411,7 +3411,6 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
const size_t length, const bool enable) {
assert(length > 0);
mdbx_notice("readahead %s %u..%u", enable ? "ON" : "OFF",
bytes2pgno(env, offset), bytes2pgno(env, offset + length));
@ -3466,18 +3465,14 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
const pgno_t limit_pgno) {
#ifdef MDBX_USE_VALGRIND
const size_t prev_mapsize = env->me_mapsize;
void *const prev_mapaddr = env->me_map;
#endif
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
mdbx_verbose("resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR,
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, limit_bytes);
env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
limit_bytes);
mdbx_assert(env, limit_bytes >= size_bytes);
mdbx_assert(env, bytes2pgno(env, size_bytes) >= size_pgno);
@ -3491,9 +3486,9 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
mdbx_handle_array_t *suspended = NULL;
mdbx_handle_array_t array_onstack;
int rc = MDBX_SUCCESS;
if (limit_bytes == env->me_dxb_mmap.length &&
if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current &&
env->me_dxb_mmap.current == env->me_dxb_mmap.filesize)
size_bytes == env->me_dxb_mmap.filesize)
goto bailout;
/* 1) Windows allows only extending a read-write section, but not a
@ -3517,27 +3512,31 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
if (rc != MDBX_SUCCESS)
return rc;
if (limit_bytes == env->me_dxb_mmap.length && size_bytes == env->me_dbgeo.now)
if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current)
goto bailout;
#endif /* Windows */
if (size_bytes < env->me_dbgeo.now) {
const size_t prev_limit = env->me_dxb_mmap.limit;
const void *const prev_addr = env->me_map;
const size_t prev_size = env->me_dxb_mmap.current;
if (size_bytes < prev_size) {
mdbx_notice("resize-MADV_%s %u..%u",
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED",
size_pgno, bytes2pgno(env, env->me_dbgeo.now));
size_pgno, bytes2pgno(env, prev_size));
#if defined(MADV_REMOVE)
if ((env->me_flags & MDBX_WRITEMAP) == 0 ||
madvise(env->me_map + size_bytes, env->me_dbgeo.now - size_bytes,
madvise(env->me_map + size_bytes, prev_size - size_bytes,
MADV_REMOVE) != 0)
#endif
#if defined(MADV_DONTNEED)
(void)madvise(env->me_map + size_bytes, env->me_dbgeo.now - size_bytes,
(void)madvise(env->me_map + size_bytes, prev_size - size_bytes,
MADV_DONTNEED);
#elif defined(POSIX_MADV_DONTNEED)
(void)posix_madvise(env->me_map + size_bytes,
env->me_dbgeo.now - size_bytes, POSIX_MADV_DONTNEED);
(void)posix_madvise(env->me_map + size_bytes, prev_size - size_bytes,
POSIX_MADV_DONTNEED);
#elif defined(POSIX_FADV_DONTNEED)
(void)posix_fadvise(env->me_fd, size_bytes, env->me_dbgeo.now - size_bytes,
(void)posix_fadvise(env->me_fd, size_bytes, prev_size - size_bytes,
POSIX_FADV_DONTNEED);
#else
__noop();
@ -3548,24 +3547,25 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
const size_t readahead_offset =
(limit_bytes != env->me_dbgeo.upper
#if defined(_WIN32) || defined(_WIN64)
|| env->me_dbgeo.now > size_bytes
#endif /* Windows */
)
? 0 /* reassign readahead to the entire map
because it (likely) was remapped */
: env->me_dbgeo.now;
rc = mdbx_is_readahead_reasonable(size_bytes, 0);
if (rc == MDBX_RESULT_FALSE)
rc = mdbx_set_readahead(env, 0, size_bytes, false);
rc = mdbx_set_readahead(
env, 0, (size_bytes > prev_size) ? size_bytes : prev_size, false);
else if (rc == MDBX_RESULT_TRUE) {
rc = MDBX_SUCCESS;
if (size_bytes > readahead_offset) {
const size_t readahead_pivot =
(limit_bytes != prev_limit || env->me_dxb_mmap.address != prev_addr
#if defined(_WIN32) || defined(_WIN64)
|| prev_size > size_bytes
#endif /* Windows */
)
? 0 /* reassign readahead to the entire map
because it was remapped */
: prev_size;
if (size_bytes > readahead_pivot) {
*env->me_discarded_tail = size_pgno;
rc = mdbx_set_readahead(env, readahead_offset,
size_bytes - readahead_offset, true);
rc = mdbx_set_readahead(env, readahead_pivot,
size_bytes - readahead_pivot, true);
}
}
}
@ -3575,35 +3575,33 @@ bailout:
#if defined(_WIN32) || defined(_WIN64)
mdbx_assert(env, size_bytes == env->me_dxb_mmap.current);
mdbx_assert(env, size_bytes <= env->me_dxb_mmap.filesize);
mdbx_assert(env, limit_bytes == env->me_dxb_mmap.length);
#endif
env->me_dbgeo.now = size_bytes;
env->me_dbgeo.upper = limit_bytes;
mdbx_assert(env, limit_bytes == env->me_dxb_mmap.limit);
#endif /* Windows */
if (env->me_txn) {
mdbx_tassert(env->me_txn, size_pgno >= env->me_txn->mt_next_pgno);
env->me_txn->mt_end_pgno = env->me_txn0->mt_end_pgno = size_pgno;
}
#ifdef MDBX_USE_VALGRIND
if (prev_mapsize != env->me_mapsize || prev_mapaddr != env->me_map) {
if (prev_limit != env->me_dxb_mmap.limit || prev_addr != env->me_map) {
VALGRIND_DISCARD(env->me_valgrind_handle);
env->me_valgrind_handle = 0;
if (env->me_mapsize)
if (env->me_dxb_mmap.limit)
env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
}
#endif
#endif /* MDBX_USE_VALGRIND */
} else {
if (rc != MDBX_RESULT_TRUE) {
mdbx_error("failed resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper,
env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
limit_bytes, rc);
} else {
mdbx_notice("unable resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper,
env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
limit_bytes, rc);
}
if (!env->me_dxb_mmap.address) {
@ -4775,7 +4773,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
rc = MDBX_PANIC;
} else {
const size_t size = pgno2bytes(env, txn->mt_end_pgno);
if (unlikely(size > env->me_mapsize)) {
if (unlikely(size > env->me_dxb_mmap.limit)) {
if (txn->mt_geo.upper > MAX_PAGENO ||
bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) !=
txn->mt_geo.upper) {
@ -4797,7 +4795,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
}
#endif
} else {
env->me_dbgeo.now = size;
env->me_dxb_mmap.current = size;
}
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
mdbx_txn_valgrind(env, txn);
@ -7943,14 +7941,14 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
}
err = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
env->me_dbgeo.upper);
env->me_dbgeo.upper, lck_rc);
if (unlikely(err != MDBX_SUCCESS))
return err;
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
const size_t meta_length = pgno2bytes(env, NUM_METAS);
(void)madvise(env->me_map, meta_length, MADV_DODUMP);
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length,
(void)madvise(env->me_map + meta_length, env->me_dxb_mmap.limit - meta_length,
(mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP
: MADV_DONTDUMP);
#endif
@ -7958,51 +7956,53 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
const size_t used_aligned2os_bytes =
roundup_powerof2(used_bytes, env->me_os_psize);
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
if (used_aligned2os_bytes < env->me_dbgeo.now) {
if (used_aligned2os_bytes < env->me_dxb_mmap.current) {
#if defined(MADV_REMOVE)
if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0) {
mdbx_notice("open-MADV_%s %u..%u", "REMOVE", *env->me_discarded_tail,
bytes2pgno(env, env->me_dbgeo.now));
bytes2pgno(env, env->me_dxb_mmap.current));
(void)madvise(env->me_map + used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes, MADV_REMOVE);
env->me_dxb_mmap.current - used_aligned2os_bytes,
MADV_REMOVE);
}
#endif /* MADV_REMOVE */
#if defined(MADV_DONTNEED)
mdbx_notice("open-MADV_%s %u..%u", "DONTNEED", *env->me_discarded_tail,
bytes2pgno(env, env->me_dbgeo.now));
bytes2pgno(env, env->me_dxb_mmap.current));
(void)madvise(env->me_map + used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes, MADV_DONTNEED);
env->me_dxb_mmap.current - used_aligned2os_bytes,
MADV_DONTNEED);
#elif defined(POSIX_MADV_DONTNEED)
(void)madvise(env->me_map + used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes,
env->me_dxb_mmap.current - used_aligned2os_bytes,
POSIX_MADV_DONTNEED);
#elif defined(POSIX_FADV_DONTNEED)
(void)posix_fadvise(env->me_fd, used_aligned2os_bytes,
env->me_dbgeo.now - used_aligned2os_bytes,
env->me_dxb_mmap.current - used_aligned2os_bytes,
POSIX_FADV_DONTNEED);
#endif /* MADV_DONTNEED */
}
#ifdef MDBX_USE_VALGRIND
env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
#endif
const bool readahead =
(env->me_flags & MDBX_NORDAHEAD) == 0 &&
mdbx_is_readahead_reasonable(env->me_dbgeo.now, 0) == MDBX_RESULT_TRUE;
err = mdbx_set_readahead(env, 0, env->me_dbgeo.now, readahead);
const bool readahead = (env->me_flags & MDBX_NORDAHEAD) == 0 &&
mdbx_is_readahead_reasonable(env->me_dxb_mmap.current,
0) == MDBX_RESULT_TRUE;
err = mdbx_set_readahead(env, 0, env->me_dxb_mmap.current, readahead);
if (err != MDBX_SUCCESS)
return err;
mdbx_assert(env, used_bytes >= pgno2bytes(env, NUM_METAS) &&
used_bytes <= env->me_mapsize);
used_bytes <= env->me_dxb_mmap.limit);
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
VALGRIND_MAKE_MEM_NOACCESS(env->me_map + used_bytes,
env->me_mapsize - used_bytes);
env->me_dxb_mmap.limit - used_bytes);
ASAN_POISON_MEMORY_REGION(env->me_map + used_bytes,
env->me_mapsize - used_bytes);
env->me_poison_edge = bytes2pgno(env, env->me_mapsize);
env->me_dxb_mmap.limit - used_bytes);
env->me_poison_edge = bytes2pgno(env, env->me_dxb_mmap.limit);
#endif /* MDBX_USE_VALGRIND */
/* NOTE: AddressSanitizer (at least GCC 7.x, 8.x) could generate
@ -8097,29 +8097,19 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
const MDBX_meta *head = mdbx_meta_head(env);
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
/* re-check file size after mmap */
uint64_t filesize_after_mmap;
err = mdbx_filesize(env->me_fd, &filesize_after_mmap);
if (unlikely(err != MDBX_SUCCESS))
return err;
if (filesize_after_mmap != expected_bytes) {
if (filesize_after_mmap != filesize_before_mmap)
mdbx_verbose("datafile resized by system to %" PRIu64 " bytes",
filesize_after_mmap);
if (filesize_after_mmap % env->me_os_psize ||
filesize_after_mmap > env->me_dbgeo.upper ||
filesize_after_mmap < used_bytes) {
mdbx_error("unacceptable/unexpected datafile size %" PRIu64,
filesize_after_mmap);
return MDBX_PROBLEM;
}
if ((env->me_flags & MDBX_RDONLY) == 0) {
meta.mm_geo.now =
bytes2pgno(env, env->me_dbgeo.now = (size_t)filesize_after_mmap);
mdbx_verbose("update meta-geo to filesize %" PRIuPTR
" bytes, %" PRIaPGNO " pages",
env->me_dbgeo.now, meta.mm_geo.now);
}
/* re-check size after mmap */
if ((env->me_dxb_mmap.current & (env->me_os_psize - 1)) != 0 ||
env->me_dxb_mmap.current < used_bytes) {
mdbx_error("unacceptable/unexpected datafile size %" PRIuPTR,
env->me_dxb_mmap.current);
return MDBX_PROBLEM;
}
if (env->me_dxb_mmap.current != expected_bytes &&
(env->me_flags & MDBX_RDONLY) == 0) {
meta.mm_geo.now = bytes2pgno(env, env->me_dxb_mmap.current);
mdbx_verbose("update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO
" pages",
env->me_dxb_mmap.current, meta.mm_geo.now);
}
if (memcmp(&meta.mm_geo, &head->mm_geo, sizeof(meta.mm_geo))) {
@ -8248,30 +8238,23 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
goto bailout;
if (lck_seize_rc == MDBX_RESULT_TRUE) {
uint64_t wanna = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) +
sizeof(MDBX_lockinfo),
env->me_os_psize);
size = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) +
sizeof(MDBX_lockinfo),
env->me_os_psize);
#ifndef NDEBUG
err = mdbx_ftruncate(env->me_lfd, size = 0);
err = mdbx_ftruncate(env->me_lfd, 0);
if (unlikely(err != MDBX_SUCCESS))
goto bailout;
#endif
mdbx_jitter4testing(false);
if (size != wanna) {
err = mdbx_ftruncate(env->me_lfd, wanna);
if (unlikely(err != MDBX_SUCCESS))
goto bailout;
size = wanna;
}
} else {
if (env->me_flags & MDBX_EXCLUSIVE) {
err = MDBX_BUSY;
goto bailout;
}
if (size > PTRDIFF_MAX || (size & (env->me_os_psize - 1)) ||
if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 ||
size < env->me_os_psize) {
mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size);
mdbx_error("lck-file has invalid size %" PRIu64 " bytes", size);
err = MDBX_PROBLEM;
goto bailout;
}
@ -8286,7 +8269,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
}
env->me_maxreaders = (unsigned)maxreaders;
err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size);
err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size,
lck_seize_rc);
if (unlikely(err != MDBX_SUCCESS))
goto bailout;
@ -14185,7 +14169,7 @@ int __cold mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
unsynced_pages = *env->me_unsynced_pages +
(*env->me_meta_sync_txnid != (uint32_t)arg->mi_last_pgno);
arg->mi_mapsize = env->me_mapsize;
arg->mi_mapsize = env->me_dxb_mmap.limit;
mdbx_compiler_barrier();
if (likely(arg->mi_meta0_txnid == mdbx_meta_txnid_fluid(env, meta0) &&
arg->mi_meta0_sign == meta0->mm_datasync_sign &&
@ -16167,7 +16151,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
return mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1) ? MDBX_RESULT_TRUE
: MDBX_RESULT_FALSE;
}
if ((size_t)offset < env->me_mapsize) {
if ((size_t)offset < env->me_dxb_mmap.limit) {
/* Указатель адресует что-то в пределах mmap, но за границей
* распределенных страниц. Такое может случится если mdbx_is_dirty()
* вызывает после операции, в ходе которой гразная страница попала

View File

@ -940,7 +940,6 @@ struct MDBX_env {
mdbx_mmap_t me_dxb_mmap; /* The main data file */
#define me_map me_dxb_mmap.dxb
#define me_fd me_dxb_mmap.fd
#define me_mapsize me_dxb_mmap.length
mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
@ -1016,13 +1015,16 @@ struct MDBX_env {
#endif
MDBX_env *me_lcklist_next;
/* struct me_dbgeo used for accepting db-geo params from user for the new
* database creation, i.e. when mdbx_env_set_geometry() was called before
* mdbx_env_open(). */
struct {
size_t lower; /* minimal size of datafile */
size_t upper; /* maximal size of datafile */
size_t now; /* current size of datafile */
size_t grow; /* step to grow datafile */
size_t shrink; /* threshold to shrink datafile */
} me_dbgeo; /* */
} me_dbgeo;
#if defined(_WIN32) || defined(_WIN64)
MDBX_srwlock me_remap_guard;

View File

@ -1020,33 +1020,47 @@ MDBX_INTERNAL_FUNC int mdbx_check4nonlocal(mdbx_filehandle_t handle,
return MDBX_SUCCESS;
}
MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
size_t limit) {
MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
const size_t size, const size_t limit,
const bool truncate) {
assert(size <= limit);
#if defined(_WIN32) || defined(_WIN64)
map->length = 0;
map->limit = 0;
map->current = 0;
map->section = NULL;
map->address = nullptr;
#if defined(_WIN32) || defined(_WIN64)
map->section = NULL;
map->filesize = 0;
#endif /* Windows */
NTSTATUS rc = mdbx_check4nonlocal(map->fd, flags);
if (rc != MDBX_SUCCESS)
return rc;
int err = mdbx_check4nonlocal(map->fd, flags);
if (unlikely(err != MDBX_SUCCESS))
return err;
rc = mdbx_filesize(map->fd, &map->filesize);
if (rc != MDBX_SUCCESS)
return rc;
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
rc = mdbx_ftruncate(map->fd, size);
if (rc == MDBX_SUCCESS)
map->filesize = size;
/* ignore error, because Windows unable shrink file
* that already mapped (by another process) */
if ((flags & MDBX_RDONLY) == 0 && truncate) {
err = mdbx_ftruncate(map->fd, size);
if (err != MDBX_SUCCESS)
return err;
#if defined(_WIN32) || defined(_WIN64)
map->filesize = size;
#else
map->current = size;
#endif
} else {
uint64_t filesize;
err = mdbx_filesize(map->fd, &filesize);
if (err != MDBX_SUCCESS)
return err;
#if defined(_WIN32) || defined(_WIN64)
map->filesize = filesize;
#else
map->current = (filesize > limit) ? limit : (size_t)filesize;
#endif
}
#if defined(_WIN32) || defined(_WIN64)
LARGE_INTEGER SectionSize;
SectionSize.QuadPart = size;
rc = NtCreateSection(
err = NtCreateSection(
&map->section,
/* DesiredAccess */
(flags & MDBX_WRITEMAP)
@ -1057,11 +1071,11 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
/* SectionPageProtection */
(flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE,
/* AllocationAttributes */ SEC_RESERVE, map->fd);
if (!NT_SUCCESS(rc))
return ntstatus2errcode(rc);
if (!NT_SUCCESS(err))
return ntstatus2errcode(err);
SIZE_T ViewSize = (flags & MDBX_RDONLY) ? 0 : limit;
rc = NtMapViewOfSection(
err = NtMapViewOfSection(
map->section, GetCurrentProcess(), &map->address,
/* ZeroBits */ 0,
/* CommitSize */ 0,
@ -1070,44 +1084,42 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size,
/* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE,
/* Win32Protect */
(flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY);
if (!NT_SUCCESS(rc)) {
if (!NT_SUCCESS(err)) {
NtClose(map->section);
map->section = 0;
map->address = nullptr;
return ntstatus2errcode(rc);
return ntstatus2errcode(err);
}
assert(map->address != MAP_FAILED);
map->current = (size_t)SectionSize.QuadPart;
map->length = ViewSize;
return MDBX_SUCCESS;
map->limit = ViewSize;
#else
int err = mdbx_check4nonlocal(map->fd, flags);
if (unlikely(err != MDBX_SUCCESS))
return err;
(void)size;
map->address = mmap(
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
MAP_SHARED, map->fd, 0);
if (unlikely(map->address == MAP_FAILED)) {
map->length = 0;
map->limit = 0;
map->current = 0;
map->address = nullptr;
return errno;
}
map->length = limit;
map->limit = limit;
#ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->length, MADV_DONTFORK) != 0))
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
return errno;
#endif
#ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->length, MADV_NOHUGEPAGE);
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
#endif
#endif
return MDBX_SUCCESS;
#endif
}
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
@ -1117,16 +1129,14 @@ MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
if (!NT_SUCCESS(rc))
ntstatus2errcode(rc);
#else
if (unlikely(munmap(map->address, map->limit)))
return errno;
#endif
map->length = 0;
map->limit = 0;
map->current = 0;
map->address = nullptr;
#else
if (unlikely(munmap(map->address, map->length)))
return errno;
map->length = 0;
map->address = nullptr;
#endif
return MDBX_SUCCESS;
}
@ -1134,13 +1144,13 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
size_t limit) {
assert(size <= limit);
#if defined(_WIN32) || defined(_WIN64)
assert(size != map->current || limit != map->length || size < map->filesize);
assert(size != map->current || limit != map->limit || size < map->filesize);
NTSTATUS status;
LARGE_INTEGER SectionSize;
int err, rc = MDBX_SUCCESS;
if (!(flags & MDBX_RDONLY) && limit == map->length && size > map->current) {
if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current) {
/* growth rw-section */
SectionSize.QuadPart = size;
status = NtExtendSection(map->section, &SectionSize);
@ -1152,10 +1162,10 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
return ntstatus2errcode(status);
}
if (limit > map->length) {
if (limit > map->limit) {
/* check ability of address space for growth before umnap */
PVOID BaseAddress = (PBYTE)map->address + map->length;
SIZE_T RegionSize = limit - map->length;
PVOID BaseAddress = (PBYTE)map->address + map->limit;
SIZE_T RegionSize = limit - map->limit;
status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0,
&RegionSize, MEM_RESERVE, PAGE_NOACCESS);
if (!NT_SUCCESS(status))
@ -1185,7 +1195,7 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
err = ntstatus2errcode(status);
bailout:
map->address = NULL;
map->current = map->length = 0;
map->current = map->limit = 0;
if (ReservedAddress)
(void)NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress,
&ReservedSize, MEM_RELEASE);
@ -1268,12 +1278,12 @@ retry_mapview:;
NtClose(map->section);
map->section = NULL;
if (map->address && (size != map->current || limit != map->length)) {
if (map->address && (size != map->current || limit != map->limit)) {
/* try remap with previously size and limit,
* but will return MDBX_RESULT_TRUE on success */
rc = MDBX_RESULT_TRUE;
size = map->current;
limit = map->length;
limit = map->limit;
goto retry_file_and_section;
}
@ -1283,28 +1293,54 @@ retry_mapview:;
assert(map->address != MAP_FAILED);
map->current = (size_t)SectionSize.QuadPart;
map->length = ViewSize;
return rc;
map->limit = ViewSize;
#else
if (limit != map->length) {
uint64_t filesize;
int rc = mdbx_filesize(map->fd, &filesize);
if (rc != MDBX_SUCCESS)
return rc;
if (flags & MDBX_RDONLY) {
map->current = (filesize > limit) ? limit : (size_t)filesize;
if (map->current != size)
rc = MDBX_RESULT_TRUE;
} else if (filesize != size) {
rc = mdbx_ftruncate(map->fd, size);
if (rc != MDBX_SUCCESS)
return rc;
map->current = size;
}
if (limit != map->limit) {
#if defined(_GNU_SOURCE) && (defined(__linux__) || defined(__gnu_linux__))
void *ptr = mremap(map->address, map->length, limit,
void *ptr = mremap(map->address, map->limit, limit,
/* LY: in case changing the mapping size calling code
must guarantees the absence of competing threads, and
a willingness to another base address */
must guarantees the absence of competing threads,
and a willingness to another base address */
MREMAP_MAYMOVE);
if (ptr == MAP_FAILED) {
int err = errno;
return (err == EAGAIN || err == ENOMEM) ? MDBX_RESULT_TRUE : err;
rc = errno;
return (rc == EAGAIN || rc == ENOMEM) ? MDBX_RESULT_TRUE : rc;
}
map->address = ptr;
map->length = limit;
map->limit = limit;
#ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
return errno;
#endif
#ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
#endif
#else
return MDBX_RESULT_TRUE;
rc = MDBX_RESULT_TRUE;
#endif /* _GNU_SOURCE && __linux__ */
}
return (flags & MDBX_RDONLY) ? MDBX_SUCCESS : mdbx_ftruncate(map->fd, size);
#endif
return rc;
}
/*----------------------------------------------------------------------------*/

View File

@ -622,18 +622,19 @@ typedef struct mdbx_mmap_param {
struct MDBX_lockinfo *lck;
};
mdbx_filehandle_t fd;
size_t length; /* mapping length, but NOT a size of file or DB */
size_t limit; /* mapping length, but NOT a size of file nor DB */
size_t current; /* mapped region size, i.e. the size of file and DB */
#if defined(_WIN32) || defined(_WIN64)
size_t current; /* mapped region size, e.g. file and DB */
uint64_t filesize;
uint64_t filesize /* in-process cache of a file size. */;
#endif
#ifdef MDBX_OSAL_SECTION
MDBX_OSAL_SECTION section;
#endif
} mdbx_mmap_t;
MDBX_INTERNAL_FUNC int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t must,
size_t limit);
MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
const size_t must, const size_t limit,
const bool truncate);
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
size_t wanna);