mdbx: implements remapping of the database file when it it possible.

Change-Id: Ida15ba1f396a33b2c6063e680dff612f39a9608f
This commit is contained in:
Leonid Yuriev 2020-07-06 16:23:52 +03:00
parent 2d0a5c42a9
commit 3351c1f869
4 changed files with 103 additions and 22 deletions

View File

@ -13,6 +13,8 @@ v0.8.2 2020-07-??:
  - Refined mode bits while auto-creating LCK-file.
- Avoids unnecessary database file re-mapping in case geometry changed by another process(es).
From the user's point of view, the MDBX_UNABLE_EXTEND_MAPSIZE error will now be returned less frequently and only when using the DB in the current process really requires it to be reopened.
- Remapping on-the-fly and of the database file was implemented.
Now remapping with a change of address is performed automatically if there are no dependent readers in the current process.
v0.8.1 2020-06-12:
  - Minor change versioning. The last number in the version now means the number of commits since last release/tag.

View File

@ -4658,7 +4658,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
const pgno_t size_pgno,
const pgno_t limit_pgno) {
const pgno_t limit_pgno, const bool implicit) {
if ((env->me_flags & MDBX_WRITEMAP) && *env->me_unsynced_pages) {
int err = mdbx_msync(&env->me_dxb_mmap, 0,
pgno_align2os_bytes(env, used_pgno), true);
@ -4711,16 +4711,40 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
mdbx_error("failed suspend-for-remap: errcode %d", rc);
goto bailout;
}
#else
const bool mapping_can_be_moved = !implicit;
#else /* Windows */
/* Acquire guard to avoid collision between read and write txns
* around env->me_dbgeo */
bool mapping_can_be_moved = false;
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current)
goto bailout;
#endif /* Windows */
if (limit_bytes != env->me_dxb_mmap.limit && env->me_lck && !implicit) {
rc = mdbx_rdt_lock(env) /* lock readers table until remap done */;
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
/* looking for readers from this process */
MDBX_lockinfo *const lck = env->me_lck;
const unsigned snap_nreaders = lck->mti_numreaders;
mapping_can_be_moved = true;
for (unsigned i = 0; i < snap_nreaders; ++i) {
if (lck->mti_readers[i].mr_pid == env->me_pid &&
lck->mti_readers[i].mr_tid != mdbx_thread_self()) {
/* the base address of the mapping can't be changed since
* the other reader thread from this process exists. */
mdbx_rdt_unlock(env);
mapping_can_be_moved = false;
break;
}
}
}
#endif /* ! Windows */
const size_t prev_size = env->me_dxb_mmap.current;
if (size_bytes < prev_size) {
@ -4758,7 +4782,8 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
*env->me_discarded_tail = size_pgno;
}
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes,
mapping_can_be_moved);
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
const int readahead = mdbx_is_readahead_reasonable(size_bytes, 0);
if (readahead == MDBX_RESULT_FALSE)
@ -4829,6 +4854,8 @@ bailout:
mdbx_free(suspended);
}
#else
if (env->me_lck && mapping_can_be_moved)
mdbx_rdt_unlock(env);
int err = mdbx_fastmutex_release(&env->me_remap_guard);
#endif /* Windows */
if (err != MDBX_SUCCESS) {
@ -4849,7 +4876,8 @@ static __cold int mdbx_mapresize_implicit(MDBX_env *env, const pgno_t used_pgno,
? limit_pgno
: /* The actual mapsize may be less since the geo.upper may be changed
by other process. So, avoids remapping until it necessary. */
mapped_pgno);
mapped_pgno,
true);
}
static int mdbx_meta_unsteady(MDBX_env *env, const txnid_t last_steady,
@ -6115,8 +6143,9 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
goto bailout;
}
rc = mdbx_mapresize_implicit(env, txn->mt_next_pgno, txn->mt_end_pgno,
txn->mt_geo.upper);
rc = mdbx_mapresize(env, txn->mt_next_pgno, txn->mt_end_pgno,
txn->mt_geo.upper,
(txn->mt_flags & MDBX_RDONLY) ? true : false);
if (rc != MDBX_SUCCESS)
goto bailout;
}
@ -9192,7 +9221,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
if (new_geo.now != current_geo->now ||
new_geo.upper != current_geo->upper) {
rc = mdbx_mapresize(env, current_geo->next, new_geo.now, new_geo.upper);
rc = mdbx_mapresize(env, current_geo->next, new_geo.now, new_geo.upper,
false);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
mdbx_assert(env, (head == nullptr) == inside_txn);

View File

@ -1403,7 +1403,7 @@ MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
}
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
size_t limit) {
size_t limit, const bool may_move) {
assert(size <= limit);
#if defined(_WIN32) || defined(_WIN64)
assert(size != map->current || limit != map->limit || size < map->filesize);
@ -1482,8 +1482,8 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
if (status != /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018)
goto bailout_ntstatus /* no way to recovery */;
/* assume we can change base address if mapping size changed or prev address
* couldn't be used */
if (may_move)
/* the base address could be changed */
map->address = NULL;
}
@ -1541,7 +1541,7 @@ retry_mapview:;
if (!NT_SUCCESS(status)) {
if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 &&
map->address) {
map->address && may_move) {
/* try remap at another base address */
map->address = NULL;
goto retry_mapview;
@ -1565,6 +1565,7 @@ retry_mapview:;
map->current = (size_t)SectionSize.QuadPart;
map->limit = ViewSize;
#else
uint64_t filesize = 0;
@ -1585,7 +1586,8 @@ retry_mapview:;
if (limit != map->limit) {
#if defined(MREMAP_MAYMOVE)
void *ptr = mremap(map->address, map->limit, limit, 0);
void *ptr =
mremap(map->address, map->limit, limit, may_move ? MREMAP_MAYMOVE : 0);
if (ptr == MAP_FAILED) {
rc = errno;
switch (rc) {
@ -1596,7 +1598,59 @@ retry_mapview:;
}
return rc;
}
#else
if (!may_move)
/* TODO: Perhaps here it is worth to implement suspend/resume threads
* and perform unmap/map as like for Windows. */
return MDBX_UNABLE_EXTEND_MAPSIZE;
if (unlikely(munmap(map->address, map->limit)))
return errno;
unsigned mmap_flags =
MAP_CONCEAL | MAP_SHARED | MAP_FILE |
(F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0);
#ifdef MAP_FIXED
if (!may_move)
mmap_flags |= MAP_FIXED;
#endif
void *ptr =
mmap(map->address, limit,
(flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
mmap_flags, map->fd, 0);
if (unlikely(ptr == MAP_FAILED)) {
ptr = mmap(map->address, map->limit,
(flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
mmap_flags, map->fd, 0);
if (unlikely(ptr == MAP_FAILED)) {
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmaping.
* See https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
*/
ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
map->limit = 0;
map->current = 0;
map->address = nullptr;
return errno;
}
return MDBX_UNABLE_EXTEND_MAPSIZE;
}
#endif /* !MREMAP_MAYMOVE */
if (map->address != ptr) {
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmaping.
* See https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
*/
ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
VALGRIND_MAKE_MEM_DEFINED(ptr, map->current);
ASAN_UNPOISON_MEMORY_REGION(ptr, map->current);
map->address = ptr;
}
map->limit = limit;
#ifdef MADV_DONTFORK
@ -1607,14 +1661,9 @@ retry_mapview:;
#ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
#endif /* MADV_NOHUGEPAGE */
#else /* MREMAP_MAYMOVE */
/* TODO: Perhaps here it is worth to implement suspend/resume threads
* and perform unmap/map as like for Windows. */
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
#endif /* !MREMAP_MAYMOVE */
}
#endif
return rc;
}

View File

@ -623,7 +623,7 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
const unsigned options);
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
size_t wanna);
size_t wanna, const bool may_move);
#if defined(_WIN32) || defined(_WIN64)
typedef struct {
unsigned limit, count;