mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-02 00:54:14 +08:00
mdbx: implements remapping of the database file when it it possible.
Change-Id: Ida15ba1f396a33b2c6063e680dff612f39a9608f
This commit is contained in:
parent
2d0a5c42a9
commit
3351c1f869
@ -13,6 +13,8 @@ v0.8.2 2020-07-??:
|
||||
- Refined mode bits while auto-creating LCK-file.
|
||||
- Avoids unnecessary database file re-mapping in case geometry changed by another process(es).
|
||||
From the user's point of view, the MDBX_UNABLE_EXTEND_MAPSIZE error will now be returned less frequently and only when using the DB in the current process really requires it to be reopened.
|
||||
- Remapping on-the-fly and of the database file was implemented.
|
||||
Now remapping with a change of address is performed automatically if there are no dependent readers in the current process.
|
||||
|
||||
v0.8.1 2020-06-12:
|
||||
- Minor change versioning. The last number in the version now means the number of commits since last release/tag.
|
||||
|
46
src/core.c
46
src/core.c
@ -4658,7 +4658,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
|
||||
|
||||
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
||||
const pgno_t size_pgno,
|
||||
const pgno_t limit_pgno) {
|
||||
const pgno_t limit_pgno, const bool implicit) {
|
||||
if ((env->me_flags & MDBX_WRITEMAP) && *env->me_unsynced_pages) {
|
||||
int err = mdbx_msync(&env->me_dxb_mmap, 0,
|
||||
pgno_align2os_bytes(env, used_pgno), true);
|
||||
@ -4711,16 +4711,40 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
||||
mdbx_error("failed suspend-for-remap: errcode %d", rc);
|
||||
goto bailout;
|
||||
}
|
||||
#else
|
||||
const bool mapping_can_be_moved = !implicit;
|
||||
#else /* Windows */
|
||||
/* Acquire guard to avoid collision between read and write txns
|
||||
* around env->me_dbgeo */
|
||||
bool mapping_can_be_moved = false;
|
||||
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
if (limit_bytes == env->me_dxb_mmap.limit &&
|
||||
size_bytes == env->me_dxb_mmap.current)
|
||||
goto bailout;
|
||||
#endif /* Windows */
|
||||
|
||||
if (limit_bytes != env->me_dxb_mmap.limit && env->me_lck && !implicit) {
|
||||
rc = mdbx_rdt_lock(env) /* lock readers table until remap done */;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
/* looking for readers from this process */
|
||||
MDBX_lockinfo *const lck = env->me_lck;
|
||||
const unsigned snap_nreaders = lck->mti_numreaders;
|
||||
mapping_can_be_moved = true;
|
||||
for (unsigned i = 0; i < snap_nreaders; ++i) {
|
||||
if (lck->mti_readers[i].mr_pid == env->me_pid &&
|
||||
lck->mti_readers[i].mr_tid != mdbx_thread_self()) {
|
||||
/* the base address of the mapping can't be changed since
|
||||
* the other reader thread from this process exists. */
|
||||
mdbx_rdt_unlock(env);
|
||||
mapping_can_be_moved = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ! Windows */
|
||||
|
||||
const size_t prev_size = env->me_dxb_mmap.current;
|
||||
if (size_bytes < prev_size) {
|
||||
@ -4758,7 +4782,8 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
||||
*env->me_discarded_tail = size_pgno;
|
||||
}
|
||||
|
||||
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
|
||||
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes,
|
||||
mapping_can_be_moved);
|
||||
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
|
||||
const int readahead = mdbx_is_readahead_reasonable(size_bytes, 0);
|
||||
if (readahead == MDBX_RESULT_FALSE)
|
||||
@ -4829,6 +4854,8 @@ bailout:
|
||||
mdbx_free(suspended);
|
||||
}
|
||||
#else
|
||||
if (env->me_lck && mapping_can_be_moved)
|
||||
mdbx_rdt_unlock(env);
|
||||
int err = mdbx_fastmutex_release(&env->me_remap_guard);
|
||||
#endif /* Windows */
|
||||
if (err != MDBX_SUCCESS) {
|
||||
@ -4849,7 +4876,8 @@ static __cold int mdbx_mapresize_implicit(MDBX_env *env, const pgno_t used_pgno,
|
||||
? limit_pgno
|
||||
: /* The actual mapsize may be less since the geo.upper may be changed
|
||||
by other process. So, avoids remapping until it necessary. */
|
||||
mapped_pgno);
|
||||
mapped_pgno,
|
||||
true);
|
||||
}
|
||||
|
||||
static int mdbx_meta_unsteady(MDBX_env *env, const txnid_t last_steady,
|
||||
@ -6115,8 +6143,9 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
||||
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||
goto bailout;
|
||||
}
|
||||
rc = mdbx_mapresize_implicit(env, txn->mt_next_pgno, txn->mt_end_pgno,
|
||||
txn->mt_geo.upper);
|
||||
rc = mdbx_mapresize(env, txn->mt_next_pgno, txn->mt_end_pgno,
|
||||
txn->mt_geo.upper,
|
||||
(txn->mt_flags & MDBX_RDONLY) ? true : false);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
goto bailout;
|
||||
}
|
||||
@ -9192,7 +9221,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
||||
|
||||
if (new_geo.now != current_geo->now ||
|
||||
new_geo.upper != current_geo->upper) {
|
||||
rc = mdbx_mapresize(env, current_geo->next, new_geo.now, new_geo.upper);
|
||||
rc = mdbx_mapresize(env, current_geo->next, new_geo.now, new_geo.upper,
|
||||
false);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
mdbx_assert(env, (head == nullptr) == inside_txn);
|
||||
|
75
src/osal.c
75
src/osal.c
@ -1403,7 +1403,7 @@ MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
|
||||
}
|
||||
|
||||
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
||||
size_t limit) {
|
||||
size_t limit, const bool may_move) {
|
||||
assert(size <= limit);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
assert(size != map->current || limit != map->limit || size < map->filesize);
|
||||
@ -1482,9 +1482,9 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
||||
if (status != /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018)
|
||||
goto bailout_ntstatus /* no way to recovery */;
|
||||
|
||||
/* assume we can change base address if mapping size changed or prev address
|
||||
* couldn't be used */
|
||||
map->address = NULL;
|
||||
if (may_move)
|
||||
/* the base address could be changed */
|
||||
map->address = NULL;
|
||||
}
|
||||
|
||||
retry_file_and_section:
|
||||
@ -1541,7 +1541,7 @@ retry_mapview:;
|
||||
|
||||
if (!NT_SUCCESS(status)) {
|
||||
if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 &&
|
||||
map->address) {
|
||||
map->address && may_move) {
|
||||
/* try remap at another base address */
|
||||
map->address = NULL;
|
||||
goto retry_mapview;
|
||||
@ -1565,6 +1565,7 @@ retry_mapview:;
|
||||
|
||||
map->current = (size_t)SectionSize.QuadPart;
|
||||
map->limit = ViewSize;
|
||||
|
||||
#else
|
||||
|
||||
uint64_t filesize = 0;
|
||||
@ -1585,7 +1586,8 @@ retry_mapview:;
|
||||
|
||||
if (limit != map->limit) {
|
||||
#if defined(MREMAP_MAYMOVE)
|
||||
void *ptr = mremap(map->address, map->limit, limit, 0);
|
||||
void *ptr =
|
||||
mremap(map->address, map->limit, limit, may_move ? MREMAP_MAYMOVE : 0);
|
||||
if (ptr == MAP_FAILED) {
|
||||
rc = errno;
|
||||
switch (rc) {
|
||||
@ -1596,7 +1598,59 @@ retry_mapview:;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
map->address = ptr;
|
||||
#else
|
||||
if (!may_move)
|
||||
/* TODO: Perhaps here it is worth to implement suspend/resume threads
|
||||
* and perform unmap/map as like for Windows. */
|
||||
return MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||
|
||||
if (unlikely(munmap(map->address, map->limit)))
|
||||
return errno;
|
||||
|
||||
unsigned mmap_flags =
|
||||
MAP_CONCEAL | MAP_SHARED | MAP_FILE |
|
||||
(F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0);
|
||||
#ifdef MAP_FIXED
|
||||
if (!may_move)
|
||||
mmap_flags |= MAP_FIXED;
|
||||
#endif
|
||||
|
||||
void *ptr =
|
||||
mmap(map->address, limit,
|
||||
(flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
||||
mmap_flags, map->fd, 0);
|
||||
if (unlikely(ptr == MAP_FAILED)) {
|
||||
ptr = mmap(map->address, map->limit,
|
||||
(flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
||||
mmap_flags, map->fd, 0);
|
||||
if (unlikely(ptr == MAP_FAILED)) {
|
||||
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
|
||||
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
||||
* when this memory will re-used by malloc or another mmaping.
|
||||
* See https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
|
||||
*/
|
||||
ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
|
||||
map->limit = 0;
|
||||
map->current = 0;
|
||||
map->address = nullptr;
|
||||
return errno;
|
||||
}
|
||||
return MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||
}
|
||||
#endif /* !MREMAP_MAYMOVE */
|
||||
|
||||
if (map->address != ptr) {
|
||||
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
|
||||
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
||||
* when this memory will re-used by malloc or another mmaping.
|
||||
* See https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
|
||||
*/
|
||||
ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
|
||||
|
||||
VALGRIND_MAKE_MEM_DEFINED(ptr, map->current);
|
||||
ASAN_UNPOISON_MEMORY_REGION(ptr, map->current);
|
||||
map->address = ptr;
|
||||
}
|
||||
map->limit = limit;
|
||||
|
||||
#ifdef MADV_DONTFORK
|
||||
@ -1607,14 +1661,9 @@ retry_mapview:;
|
||||
#ifdef MADV_NOHUGEPAGE
|
||||
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
|
||||
#endif /* MADV_NOHUGEPAGE */
|
||||
|
||||
#else /* MREMAP_MAYMOVE */
|
||||
/* TODO: Perhaps here it is worth to implement suspend/resume threads
|
||||
* and perform unmap/map as like for Windows. */
|
||||
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||
#endif /* !MREMAP_MAYMOVE */
|
||||
}
|
||||
#endif
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -623,7 +623,7 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
|
||||
const unsigned options);
|
||||
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
|
||||
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
|
||||
size_t wanna);
|
||||
size_t wanna, const bool may_move);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
typedef struct {
|
||||
unsigned limit, count;
|
||||
|
Loading…
x
Reference in New Issue
Block a user