mdbx: rework mmap-functions for osal.

- add 'length' and 'current' fields to mmap-object;
 - drop mdbx_mremap();
 - do remap on-demand inside mdbx_mresize();
 - add mdbx_mapresize() which re-creates Valgrind's region.
 - call resize on txn-begin.

Change-Id: I82780f92c4947804e3f14fb7cb71ee655382f9bb
This commit is contained in:
Leo Yuriev 2017-07-12 21:13:17 +03:00
parent 700ec68d06
commit 17e8429a29
9 changed files with 290 additions and 244 deletions

24
mdbx.h
View File

@ -448,18 +448,18 @@ typedef struct MDBX_envinfo {
uint64_t current; /* current datafile size */
uint64_t shrink; /* shrink theshold for datafile */
uint64_t grow; /* growth step for datafile */
} me_geo;
uint64_t me_mapsize; /* Size of the data memory map */
uint64_t me_last_pgno; /* ID of the last used page */
uint64_t me_recent_txnid; /* ID of the last committed transaction */
uint64_t me_latter_reader_txnid; /* ID of the last reader transaction */
uint64_t me_meta0_txnid, me_meta0_sign;
uint64_t me_meta1_txnid, me_meta1_sign;
uint64_t me_meta2_txnid, me_meta2_sign;
uint32_t me_maxreaders; /* max reader slots in the environment */
uint32_t me_numreaders; /* max reader slots used in the environment */
uint32_t me_dxb_pagesize; /* database pagesize */
uint32_t me_sys_pagesize; /* system pagesize */
} mi_geo;
uint64_t mi_mapsize; /* Size of the data memory map */
uint64_t mi_last_pgno; /* ID of the last used page */
uint64_t mi_recent_txnid; /* ID of the last committed transaction */
uint64_t mi_latter_reader_txnid; /* ID of the last reader transaction */
uint64_t mi_meta0_txnid, mi_meta0_sign;
uint64_t mi_meta1_txnid, mi_meta1_sign;
uint64_t mi_meta2_txnid, mi_meta2_sign;
uint32_t mi_maxreaders; /* max reader slots in the environment */
uint32_t mi_numreaders; /* max reader slots used in the environment */
uint32_t mi_dxb_pagesize; /* database pagesize */
uint32_t mi_sys_pagesize; /* system pagesize */
} MDBX_envinfo;
/* Return a string describing a given error code.

View File

@ -660,11 +660,12 @@ struct MDBX_env {
#define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
size_t me_signature;
mdbx_mmap_t me_dxb_mmap; /* The main data file */
mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_map me_dxb_mmap.dxb
#define me_lck me_lck_mmap.lck
#define me_fd me_dxb_mmap.fd
#define me_mapsize me_dxb_mmap.length
mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
/* Failed to update the meta page. Probably an I/O error. */
#define MDBX_FATAL_ERROR UINT32_C(0x80000000)
@ -688,7 +689,6 @@ struct MDBX_env {
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
size_t me_mapsize; /* size of the data memory map */
MDBX_dbx *me_dbxs; /* array of static DB info */
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
unsigned *me_dbiseqs; /* array of dbi sequence numbers */
@ -719,6 +719,7 @@ struct MDBX_env {
#ifdef USE_VALGRIND
int me_valgrind_handle;
#endif
struct {
size_t lower; /* minimal size of datafile */
size_t upper; /* maximal size of datafile */

View File

@ -1537,6 +1537,54 @@ static void mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
txn->mt_dirtyroom--;
}
static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
const pgno_t limit_pgno) {
#ifdef USE_VALGRIND
const size_t prev_mapsize = env->me_mapsize;
void *const prev_mapaddr = env->me_map;
#endif
const size_t limit_bytes =
mdbx_roundup2(pgno2bytes(env, limit_pgno), env->me_os_psize);
const size_t size_bytes =
mdbx_roundup2(pgno2bytes(env, size_pgno), env->me_os_psize);
mdbx_info("resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR,
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, limit_bytes);
mdbx_assert(env, limit_bytes >= size_bytes);
mdbx_assert(env, bytes2pgno(env, size_bytes) == size_pgno);
mdbx_assert(env, bytes2pgno(env, limit_bytes) == limit_pgno);
const int rc =
mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
if (rc == MDBX_SUCCESS) {
if (env->me_txn0)
env->me_txn0->mt_end_pgno = size_pgno;
env->me_dbgeo.now = size_bytes;
env->me_dbgeo.upper = limit_bytes;
} else {
mdbx_error("failed resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
env->me_dbgeo.now, size_bytes, env->me_dbgeo.upper, limit_bytes,
rc);
}
#ifdef USE_VALGRIND
if (prev_mapsize != env->me_mapsize || prev_mapaddr != env->me_map) {
VALGRIND_DISCARD(env->me_valgrind_handle);
env->me_valgrind_handle = 0;
if (env->me_mapsize)
env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
}
#endif
return rc;
}
/* Allocate page numbers and memory for writing. Maintain me_last_reclaimed,
* me_reclaimed_pglist and mt_next_pgno. Set MDBX_TXN_ERROR on failure.
*
@ -1835,36 +1883,24 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
}
}
if (rc == MDBX_MAP_FULL) {
if (rc == MDBX_MAP_FULL && next < head->mm_geo.upper) {
mdbx_assert(env, next > txn->mt_end_pgno);
if (unlikely(pgno2bytes(env, next) <= env->me_mapsize)) {
pgno_t growth_pgno = txn->mt_next_pgno + head->mm_geo.grow;
if (growth_pgno > MAX_PAGENO)
growth_pgno = MAX_PAGENO;
size_t growth_bytes =
mdbx_roundup2(pgno2bytes(env, growth_pgno), env->me_os_psize);
if (growth_bytes > env->me_mapsize)
growth_bytes = env->me_mapsize;
growth_pgno = bytes2pgno(env, growth_bytes);
mdbx_assert(env, growth_pgno <= head->mm_geo.upper);
mdbx_assert(env, growth_pgno > txn->mt_end_pgno);
mdbx_info("growth datafile to %" PRIaPGNO " pages (+%" PRIaPGNO
"), %" PRIuPTR " bytes",
growth_pgno, growth_pgno - txn->mt_end_pgno, growth_bytes);
pgno_t growth_pgno = bytes2pgno(
env,
mdbx_roundup2(pgno2bytes(env, txn->mt_next_pgno + head->mm_geo.grow),
env->me_os_psize));
if (growth_pgno > head->mm_geo.upper)
growth_pgno = head->mm_geo.upper;
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
growth_bytes);
if (rc == MDBX_SUCCESS) {
txn->mt_end_pgno = growth_pgno;
env->me_dbgeo.now = growth_bytes;
mdbx_info("try growth datafile to %" PRIaPGNO " pages (+%" PRIaPGNO ")",
growth_pgno, growth_pgno - txn->mt_end_pgno);
rc = mdbx_mapresize(env, growth_pgno, head->mm_geo.upper);
if (rc == MDBX_SUCCESS)
continue;
}
mdbx_error("error while growth datafile to %" PRIaPGNO
"pages (+%" PRIaPGNO "), %" PRIuPTR " bytes, errcode %d",
growth_pgno, growth_pgno - txn->mt_end_pgno, growth_bytes,
rc);
} else if (next < head->mm_geo.upper)
rc = MDBX_MAP_RESIZED;
mdbx_warning("unable growth datafile to %" PRIaPGNO "pages (+%" PRIaPGNO
"), errcode %d",
growth_pgno, growth_pgno - txn->mt_end_pgno, rc);
}
fail:
@ -2267,6 +2303,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
return MDBX_PANIC;
}
pgno_t upper_pgno = 0;
if (flags & MDBX_TXN_RDONLY) {
txn->mt_flags = MDBX_TXN_RDONLY;
MDBX_reader *r = txn->mt_ro_reader;
@ -2364,6 +2401,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
txn->mt_txnid = snap;
txn->mt_next_pgno = meta->mm_geo.next;
txn->mt_end_pgno = meta->mm_geo.now;
upper_pgno = meta->mm_geo.upper;
memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDBX_db));
txn->mt_canary = meta->mm_canary;
@ -2417,6 +2455,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
/* Moved to here to avoid a data race in read TXNs */
txn->mt_next_pgno = meta->mm_geo.next;
txn->mt_end_pgno = meta->mm_geo.now;
upper_pgno = meta->mm_geo.upper;
}
/* Setup db info */
@ -2433,9 +2472,18 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) {
mdbx_debug("environment had fatal error, must shutdown!");
rc = MDBX_PANIC;
} else if (unlikely(env->me_mapsize < pgno2bytes(env, txn->mt_next_pgno))) {
rc = MDBX_MAP_RESIZED;
} else {
const size_t size = pgno2bytes(env, txn->mt_end_pgno);
if (unlikely(size > env->me_mapsize)) {
if (upper_pgno > MAX_PAGENO ||
bytes2pgno(env, pgno2bytes(env, upper_pgno)) != upper_pgno) {
rc = MDBX_MAP_RESIZED;
goto bailout;
}
rc = mdbx_mapresize(env, txn->mt_end_pgno, upper_pgno);
if (rc != MDBX_SUCCESS)
goto bailout;
}
return MDBX_SUCCESS;
}
bailout:
@ -3801,10 +3849,8 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
/* Windows is unable shrinking a mapped file */
#else
/* LY: check conditions to shrink datafile */
pgno_t shrink_pgno_delta = 0;
const pgno_t shrink_pgno = pending->mm_geo.next /* + pending->mm_geo.grow */;
const size_t shrink_bytes =
mdbx_roundup2(pgno2bytes(env, shrink_pgno), env->me_os_psize);
size_t shrink_pgno_delta = 0;
if (pending->mm_geo.now > shrink_pgno && pending->mm_geo.shrink &&
unlikely(pending->mm_geo.now - pending->mm_geo.shrink >= shrink_pgno)) {
if (pending->mm_geo.now > shrink_pgno &&
@ -3965,12 +4011,9 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
/* Windows is unable shrinking a mapped file */
#else
/* LY: shrink datafile if needed */
if (shrink_pgno_delta) {
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
shrink_bytes);
if (rc == MDBX_SUCCESS)
env->me_dbgeo.now = shrink_bytes;
else if (rc != MDBX_RESULT_TRUE)
if (unlikely(shrink_pgno_delta)) {
rc = mdbx_mapresize(env, pending->mm_geo.now, pending->mm_geo.upper);
if (MDBX_IS_ERROR(rc))
goto fail;
}
#endif /* not a Windows */
@ -4079,7 +4122,7 @@ bailout:
static int __cold mdbx_env_map(MDBX_env *env, size_t usedsize) {
int rc = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
env->me_mapsize);
env->me_dbgeo.upper);
if (unlikely(rc != MDBX_SUCCESS)) {
env->me_map = NULL;
return rc;
@ -4304,7 +4347,7 @@ LIBMDBX_API int mdbx_env_set_geometry(MDBX_env *env, ssize_t size_lower,
if (bytes2pgno(env, shrink_threshold) > UINT16_MAX)
shrink_threshold = pgno2bytes(env, UINT16_MAX);
/* save params for future open/create */
/* save user's geo-params for future open/create */
env->me_dbgeo.lower = size_lower;
env->me_dbgeo.now = size_now;
env->me_dbgeo.upper = size_upper;
@ -4337,27 +4380,9 @@ LIBMDBX_API int mdbx_env_set_geometry(MDBX_env *env, ssize_t size_lower,
meta.mm_geo.shrink == bytes2pgno(env, env->me_dbgeo.shrink));
if (memcmp(&meta.mm_geo, &head->mm_geo, sizeof(meta.mm_geo))) {
if (meta.mm_geo.upper != head->mm_geo.upper) {
const size_t size =
mdbx_roundup2(pgno2bytes(env, meta.mm_geo.upper), env->me_os_psize);
rc = mdbx_mremap(env->me_flags, &env->me_dxb_mmap, env->me_mapsize,
size);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
env->me_mapsize = size;
#ifdef USE_VALGRIND
VALGRIND_DISCARD(env->me_valgrind_handle);
env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx");
#endif
}
if (meta.mm_geo.now != head->mm_geo.now) {
const size_t size =
mdbx_roundup2(pgno2bytes(env, meta.mm_geo.now), env->me_os_psize);
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap,
pgno2bytes(env, head->mm_geo.now), size);
if (meta.mm_geo.now != head->mm_geo.now ||
meta.mm_geo.upper != head->mm_geo.upper) {
rc = mdbx_mapresize(env, meta.mm_geo.now, meta.mm_geo.upper);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
}
@ -4503,7 +4528,6 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
meta.mm_geo.shrink = (uint16_t)bytes2pgno(env, env->me_dbgeo.shrink);
mdbx_ensure(env, meta.mm_geo.now >= meta.mm_geo.next);
}
env->me_mapsize = env->me_dbgeo.upper;
uint64_t filesize;
err = mdbx_filesize(env->me_fd, &filesize);
@ -4541,7 +4565,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
}
}
err = mdbx_env_map(env, env->me_mapsize);
err = mdbx_env_map(env, expected_bytes);
if (err)
return err;
@ -4997,7 +5021,7 @@ static void __cold mdbx_env_close0(MDBX_env *env) {
}
if (env->me_map) {
mdbx_munmap(&env->me_dxb_mmap, env->me_mapsize);
mdbx_munmap(&env->me_dxb_mmap);
#ifdef USE_VALGRIND
VALGRIND_DISCARD(env->me_valgrind_handle);
env->me_valgrind_handle = -1;
@ -5009,9 +5033,7 @@ static void __cold mdbx_env_close0(MDBX_env *env) {
}
if (env->me_lck) {
mdbx_munmap(&env->me_lck_mmap,
(env->me_maxreaders - 1) * sizeof(MDBX_reader) +
sizeof(MDBX_lockinfo));
mdbx_munmap(&env->me_lck_mmap);
env->me_lck = nullptr;
}
env->me_pid = 0;
@ -9607,44 +9629,44 @@ int __cold mdbx_env_info(MDBX_env *env, MDBX_envinfo *arg, size_t bytes) {
const MDBX_meta *meta;
do {
meta = mdbx_meta_head(env);
arg->me_recent_txnid = mdbx_meta_txnid_fluid(env, meta);
arg->me_meta0_txnid = mdbx_meta_txnid_fluid(env, meta0);
arg->me_meta0_sign = meta0->mm_datasync_sign;
arg->me_meta1_txnid = mdbx_meta_txnid_fluid(env, meta1);
arg->me_meta1_sign = meta1->mm_datasync_sign;
arg->me_meta2_txnid = mdbx_meta_txnid_fluid(env, meta2);
arg->me_meta2_sign = meta2->mm_datasync_sign;
arg->me_last_pgno = meta->mm_geo.next - 1;
arg->me_geo.lower = pgno2bytes(env, meta->mm_geo.lower);
arg->me_geo.upper = pgno2bytes(env, meta->mm_geo.upper);
arg->me_geo.current = pgno2bytes(env, meta->mm_geo.now);
arg->me_geo.shrink = pgno2bytes(env, meta->mm_geo.shrink);
arg->me_geo.grow = pgno2bytes(env, meta->mm_geo.grow);
arg->me_mapsize = env->me_mapsize;
arg->mi_recent_txnid = mdbx_meta_txnid_fluid(env, meta);
arg->mi_meta0_txnid = mdbx_meta_txnid_fluid(env, meta0);
arg->mi_meta0_sign = meta0->mm_datasync_sign;
arg->mi_meta1_txnid = mdbx_meta_txnid_fluid(env, meta1);
arg->mi_meta1_sign = meta1->mm_datasync_sign;
arg->mi_meta2_txnid = mdbx_meta_txnid_fluid(env, meta2);
arg->mi_meta2_sign = meta2->mm_datasync_sign;
arg->mi_last_pgno = meta->mm_geo.next - 1;
arg->mi_geo.lower = pgno2bytes(env, meta->mm_geo.lower);
arg->mi_geo.upper = pgno2bytes(env, meta->mm_geo.upper);
arg->mi_geo.current = pgno2bytes(env, meta->mm_geo.now);
arg->mi_geo.shrink = pgno2bytes(env, meta->mm_geo.shrink);
arg->mi_geo.grow = pgno2bytes(env, meta->mm_geo.grow);
arg->mi_mapsize = env->me_mapsize;
mdbx_compiler_barrier();
} while (unlikely(arg->me_meta0_txnid != mdbx_meta_txnid_fluid(env, meta0) ||
arg->me_meta0_sign != meta0->mm_datasync_sign ||
arg->me_meta1_txnid != mdbx_meta_txnid_fluid(env, meta1) ||
arg->me_meta1_sign != meta1->mm_datasync_sign ||
arg->me_meta2_txnid != mdbx_meta_txnid_fluid(env, meta2) ||
arg->me_meta2_sign != meta2->mm_datasync_sign ||
} while (unlikely(arg->mi_meta0_txnid != mdbx_meta_txnid_fluid(env, meta0) ||
arg->mi_meta0_sign != meta0->mm_datasync_sign ||
arg->mi_meta1_txnid != mdbx_meta_txnid_fluid(env, meta1) ||
arg->mi_meta1_sign != meta1->mm_datasync_sign ||
arg->mi_meta2_txnid != mdbx_meta_txnid_fluid(env, meta2) ||
arg->mi_meta2_sign != meta2->mm_datasync_sign ||
meta != mdbx_meta_head(env) ||
arg->me_recent_txnid != mdbx_meta_txnid_fluid(env, meta)));
arg->mi_recent_txnid != mdbx_meta_txnid_fluid(env, meta)));
arg->me_maxreaders = env->me_maxreaders;
arg->me_numreaders = env->me_lck ? env->me_lck->mti_numreaders : INT32_MAX;
arg->me_dxb_pagesize = env->me_psize;
arg->me_sys_pagesize = env->me_os_psize;
arg->mi_maxreaders = env->me_maxreaders;
arg->mi_numreaders = env->me_lck ? env->me_lck->mti_numreaders : INT32_MAX;
arg->mi_dxb_pagesize = env->me_psize;
arg->mi_sys_pagesize = env->me_os_psize;
arg->me_latter_reader_txnid = 0;
arg->mi_latter_reader_txnid = 0;
if (env->me_lck) {
MDBX_reader *r = env->me_lck->mti_readers;
arg->me_latter_reader_txnid = arg->me_recent_txnid;
for (unsigned i = 0; i < arg->me_numreaders; ++i) {
arg->mi_latter_reader_txnid = arg->mi_recent_txnid;
for (unsigned i = 0; i < arg->mi_numreaders; ++i) {
if (r[i].mr_pid) {
const txnid_t txnid = r[i].mr_txnid;
if (arg->me_latter_reader_txnid > txnid)
arg->me_latter_reader_txnid = txnid;
if (arg->mi_latter_reader_txnid > txnid)
arg->mi_latter_reader_txnid = txnid;
}
}
}
@ -10451,9 +10473,10 @@ int mdbx_txn_straggler(MDBX_txn *txn, int *percent)
return MDBX_THREAD_MISMATCH;
MDBX_env *env = txn->mt_env;
pgno_t maxpg = bytes2pgno(env, env->me_mapsize);
if (unlikely((txn->mt_flags & MDBX_RDONLY) == 0)) {
*percent = (int)((txn->mt_next_pgno * UINT64_C(100) + maxpg / 2) / maxpg);
*percent =
(int)((txn->mt_next_pgno * UINT64_C(100) + txn->mt_end_pgno / 2) /
txn->mt_end_pgno);
return -1;
}
@ -10462,8 +10485,10 @@ int mdbx_txn_straggler(MDBX_txn *txn, int *percent)
do {
meta = mdbx_meta_head(env);
recent = mdbx_meta_txnid_fluid(env, meta);
if (percent)
if (percent) {
const pgno_t maxpg = meta->mm_geo.now;
*percent = (int)((meta->mm_geo.next * UINT64_C(100) + maxpg / 2) / maxpg);
}
} while (unlikely(recent != mdbx_meta_txnid_fluid(env, meta)));
txnid_t lag = recent - txn->mt_ro_reader->mr_txnid;

View File

@ -773,9 +773,12 @@ int mdbx_msync(mdbx_mmap_t *map, size_t offset, size_t length, int async) {
#endif
}
int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t length, size_t limit) {
int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t must, size_t limit) {
assert(must <= limit);
#if defined(_WIN32) || defined(_WIN64)
map->section = 0;
map->length = 0;
map->current = 0;
map->section = NULL;
map->address = MAP_FAILED;
if (GetFileType(map->fd) != FILE_TYPE_DISK)
@ -870,11 +873,11 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t length, size_t limit) {
return ntstatus2errcode(rc);
map->address = NULL;
SIZE_T ViewSize = limit;
SIZE_T ViewSize = (flags & MDBX_RDONLY) ? must : limit;
rc = NtMapViewOfSection(
map->section, GetCurrentProcess(), &map->address,
/* ZeroBits */ 0,
/* CommitSize */ length,
/* CommitSize */ must,
/* SectionOffset */ NULL, &ViewSize,
/* InheritDisposition */ ViewUnmap,
/* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE,
@ -889,26 +892,40 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t length, size_t limit) {
}
assert(map->address != MAP_FAILED);
map->current = must;
map->length = ViewSize;
return MDBX_SUCCESS;
#else
(void)length;
(void)must;
map->address = mmap(
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
MAP_SHARED, map->fd, 0);
return (map->address != MAP_FAILED) ? MDBX_SUCCESS : errno;
if (likely(map->address != MAP_FAILED)) {
map->length = limit;
return MDBX_SUCCESS;
}
map->length = 0;
return errno;
#endif
}
int mdbx_munmap(mdbx_mmap_t *map, size_t length) {
int mdbx_munmap(mdbx_mmap_t *map) {
#if defined(_WIN32) || defined(_WIN64)
(void)length;
if (map->section)
NtClose(map->section);
NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
return NT_SUCCESS(rc) ? MDBX_SUCCESS : ntstatus2errcode(rc);
if (!NT_SUCCESS(rc))
ntstatus2errcode(rc);
map->length = 0;
map->current = 0;
map->address = nullptr;
#else
return (munmap(map->address, length) == 0) ? MDBX_SUCCESS : errno;
if (unlikely(munmap(map->address, map->length)))
return errno;
map->length = 0;
map->address = nullptr;
#endif
return MDBX_SUCCESS;
}
int mdbx_mlock(mdbx_mmap_t *map, size_t length) {
@ -919,45 +936,46 @@ int mdbx_mlock(mdbx_mmap_t *map, size_t length) {
#endif
}
int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current, size_t wanna) {
int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t must, size_t limit) {
assert(must <= limit);
#if defined(_WIN32) || defined(_WIN64)
if (wanna > current) {
/* growth */
uint8_t *ptr = (uint8_t *)map->address + current;
return (ptr == VirtualAlloc(ptr, wanna - current, MEM_COMMIT,
(flags & MDBX_WRITEMAP) ? PAGE_READWRITE
: PAGE_READONLY))
? MDBX_SUCCESS
: GetLastError();
}
/* Windows is unable shrinking a mapped file */
return MDBX_RESULT_TRUE;
#else
(void)flags;
(void)current;
return mdbx_ftruncate(map->fd, wanna);
#endif
}
int mdbx_mremap(int flags, mdbx_mmap_t *map, size_t old_limit,
size_t new_limit) {
#if defined(_WIN32) || defined(_WIN64)
(void)flags;
if (old_limit > new_limit) {
if (limit < map->length) {
/* Windows is unable shrinking a mapped section */
return ERROR_USER_MAPPED_FILE;
}
if (limit > map->length) {
/* extend */
LARGE_INTEGER new_size;
new_size.QuadPart = new_limit;
new_size.QuadPart = limit;
NTSTATUS rc = NtExtendSection(map->section, &new_size);
return NT_SUCCESS(rc) ? MDBX_SUCCESS : ntstatus2errcode(rc);
if (!NT_SUCCESS(rc))
return ntstatus2errcode(rc);
map->length = limit;
}
if (must < map->current) {
/* Windows is unable shrinking a mapped file */
return MDBX_RESULT_TRUE;
}
if (must > map->current) {
/* growth */
uint8_t *ptr = (uint8_t *)map->address + map->current;
if (ptr !=
VirtualAlloc(ptr, must - map->current, MEM_COMMIT,
(flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY))
return GetLastError();
map->current = must;
}
return MDBX_SUCCESS;
#else
(void)flags;
void *ptr = mremap(map->address, old_limit, new_limit, 0);
if (limit != map->length) {
void *ptr = mremap(map->address, map->length, limit, MREMAP_MAYMOVE);
if (ptr == MAP_FAILED)
return errno;
map->address = ptr;
return MDBX_SUCCESS;
map->length = limit;
}
return mdbx_ftruncate(map->fd, must);
#endif
}

View File

@ -444,17 +444,19 @@ typedef struct mdbx_mmap_param {
struct MDBX_lockinfo *lck;
};
mdbx_filehandle_t fd;
size_t length; /* mapping length, but NOT a size of file or DB */
#if defined(_WIN32) || defined(_WIN64)
size_t current; /* mapped region size, e.g. file and DB */
#endif
#ifdef MDBX_OSAL_SECTION
MDBX_OSAL_SECTION section;
#endif
} mdbx_mmap_t;
int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t length, size_t limit);
int mdbx_munmap(mdbx_mmap_t *map, size_t length);
int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t must, size_t limit);
int mdbx_munmap(mdbx_mmap_t *map);
int mdbx_mlock(mdbx_mmap_t *map, size_t length);
int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current, size_t wanna);
int mdbx_mremap(int flags, mdbx_mmap_t *map, size_t old_limit,
size_t new_limit);
int mdbx_msync(mdbx_mmap_t *map, size_t offset, size_t length, int async);
static __inline mdbx_pid_t mdbx_getpid(void) {

View File

@ -335,7 +335,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
if (key->iov_len != sizeof(txnid_t))
problem_add("entry", record_number, "wrong txn-id size",
"key-size %" PRIiPTR "", key->iov_len);
else if (txnid < 1 || txnid > envinfo.me_recent_txnid)
else if (txnid < 1 || txnid > envinfo.mi_recent_txnid)
problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN "", txnid);
if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t))
@ -352,14 +352,14 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
data->iov_len);
else {
freedb_pages += number;
if (envinfo.me_latter_reader_txnid > txnid)
if (envinfo.mi_latter_reader_txnid > txnid)
reclaimable_pages += number;
for (i = number, prev = 1; --i >= 0;) {
pg = iptr[i];
if (pg < NUM_METAS || pg > envinfo.me_last_pgno)
if (pg < NUM_METAS || pg > envinfo.mi_last_pgno)
problem_add("entry", record_number, "wrong idl entry",
"%u < %" PRIiPTR " < %" PRIiPTR "", NUM_METAS, pg,
envinfo.me_last_pgno);
envinfo.mi_last_pgno);
else if (pg <= prev) {
bad = " [bad sequence]";
problem_add("entry", record_number, "bad sequence",
@ -636,16 +636,16 @@ static __inline bool meta_eq(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b,
static __inline int meta_recent(const bool roolback2steady) {
if (meta_ot(envinfo.me_meta0_txnid, envinfo.me_meta0_sign,
envinfo.me_meta1_txnid, envinfo.me_meta1_sign, roolback2steady))
return meta_ot(envinfo.me_meta2_txnid, envinfo.me_meta2_sign,
envinfo.me_meta1_txnid, envinfo.me_meta1_sign,
if (meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, roolback2steady))
return meta_ot(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign,
envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign,
roolback2steady)
? 1
: 2;
return meta_ot(envinfo.me_meta0_txnid, envinfo.me_meta0_sign,
envinfo.me_meta2_txnid, envinfo.me_meta2_sign, roolback2steady)
return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, roolback2steady)
? 2
: 0;
}
@ -653,18 +653,18 @@ static __inline int meta_recent(const bool roolback2steady) {
static __inline int meta_tail(int head) {
if (head == 0)
return meta_ot(envinfo.me_meta1_txnid, envinfo.me_meta1_sign,
envinfo.me_meta2_txnid, envinfo.me_meta2_sign, true)
return meta_ot(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign,
envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true)
? 1
: 2;
if (head == 1)
return meta_ot(envinfo.me_meta0_txnid, envinfo.me_meta0_sign,
envinfo.me_meta2_txnid, envinfo.me_meta2_sign, true)
return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true)
? 0
: 2;
if (head == 2)
return meta_ot(envinfo.me_meta0_txnid, envinfo.me_meta0_sign,
envinfo.me_meta1_txnid, envinfo.me_meta1_sign, true)
return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, true)
? 0
: 1;
assert(false);
@ -698,10 +698,10 @@ void verbose_meta(int num, txnid_t txnid, uint64_t sign) {
if (stay)
print(", stay");
if (txnid > envinfo.me_recent_txnid &&
if (txnid > envinfo.mi_recent_txnid &&
(exclusive || (envflags & MDBX_RDONLY) == 0))
print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")",
txnid - envinfo.me_recent_txnid, txnid, envinfo.me_recent_txnid);
txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid);
print("\n");
}
@ -712,26 +712,26 @@ static int check_meta_head(bool steady) {
error(" - unexpected internal error (%s)\n",
steady ? "meta_steady_head" : "meta_weak_head");
case 0:
if (envinfo.me_meta0_txnid != envinfo.me_recent_txnid) {
if (envinfo.mi_meta0_txnid != envinfo.mi_recent_txnid) {
print(" - meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64
")\n",
0, envinfo.me_meta0_txnid, envinfo.me_recent_txnid);
0, envinfo.mi_meta0_txnid, envinfo.mi_recent_txnid);
return 1;
}
break;
case 1:
if (envinfo.me_meta1_txnid != envinfo.me_recent_txnid) {
if (envinfo.mi_meta1_txnid != envinfo.mi_recent_txnid) {
print(" - meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64
")\n",
1, envinfo.me_meta1_txnid, envinfo.me_recent_txnid);
1, envinfo.mi_meta1_txnid, envinfo.mi_recent_txnid);
return 1;
}
break;
case 2:
if (envinfo.me_meta2_txnid != envinfo.me_recent_txnid) {
if (envinfo.mi_meta2_txnid != envinfo.mi_recent_txnid) {
print(" - meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64
")\n",
2, envinfo.me_meta2_txnid, envinfo.me_recent_txnid);
2, envinfo.mi_meta2_txnid, envinfo.mi_recent_txnid);
return 1;
}
}
@ -890,50 +890,50 @@ int main(int argc, char *argv[]) {
goto bailout;
}
lastpgno = envinfo.me_last_pgno + 1;
lastpgno = envinfo.mi_last_pgno + 1;
errno = 0;
if (verbose) {
print(" - pagesize %u (%u system), max keysize %" PRIuPTR
", max readers %u\n",
envinfo.me_dxb_pagesize, envinfo.me_sys_pagesize, maxkeysize,
envinfo.me_maxreaders);
print_size(" - mapsize ", envinfo.me_mapsize, "\n");
if (envinfo.me_geo.lower == envinfo.me_geo.upper)
print_size(" - fixed datafile: ", envinfo.me_geo.current, "");
envinfo.mi_dxb_pagesize, envinfo.mi_sys_pagesize, maxkeysize,
envinfo.mi_maxreaders);
print_size(" - mapsize ", envinfo.mi_mapsize, "\n");
if (envinfo.mi_geo.lower == envinfo.mi_geo.upper)
print_size(" - fixed datafile: ", envinfo.mi_geo.current, "");
else {
print_size(" - dynamic datafile: ", envinfo.me_geo.lower, "");
print_size(" .. ", envinfo.me_geo.upper, ", ");
print_size("+", envinfo.me_geo.grow, ", ");
print_size("-", envinfo.me_geo.shrink, "\n");
print_size(" - current datafile: ", envinfo.me_geo.current, "");
print_size(" - dynamic datafile: ", envinfo.mi_geo.lower, "");
print_size(" .. ", envinfo.mi_geo.upper, ", ");
print_size("+", envinfo.mi_geo.grow, ", ");
print_size("-", envinfo.mi_geo.shrink, "\n");
print_size(" - current datafile: ", envinfo.mi_geo.current, "");
}
printf(", %" PRIu64 " pages\n",
envinfo.me_geo.current / envinfo.me_dxb_pagesize);
envinfo.mi_geo.current / envinfo.mi_dxb_pagesize);
print(" - transactions: recent %" PRIu64 ", latter reader %" PRIu64
", lag %" PRIi64 "\n",
envinfo.me_recent_txnid, envinfo.me_latter_reader_txnid,
envinfo.me_recent_txnid - envinfo.me_latter_reader_txnid);
envinfo.mi_recent_txnid, envinfo.mi_latter_reader_txnid,
envinfo.mi_recent_txnid - envinfo.mi_latter_reader_txnid);
verbose_meta(0, envinfo.me_meta0_txnid, envinfo.me_meta0_sign);
verbose_meta(1, envinfo.me_meta1_txnid, envinfo.me_meta1_sign);
verbose_meta(2, envinfo.me_meta2_txnid, envinfo.me_meta2_sign);
verbose_meta(0, envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign);
verbose_meta(1, envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign);
verbose_meta(2, envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign);
}
if (verbose)
print(" - performs check for meta-pages clashes\n");
if (meta_eq(envinfo.me_meta0_txnid, envinfo.me_meta0_sign,
envinfo.me_meta1_txnid, envinfo.me_meta1_sign)) {
if (meta_eq(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign,
envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign)) {
print(" - meta-%d and meta-%d are clashed\n", 0, 1);
++problems_meta;
}
if (meta_eq(envinfo.me_meta1_txnid, envinfo.me_meta1_sign,
envinfo.me_meta2_txnid, envinfo.me_meta2_sign)) {
if (meta_eq(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign,
envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign)) {
print(" - meta-%d and meta-%d are clashed\n", 1, 2);
++problems_meta;
}
if (meta_eq(envinfo.me_meta2_txnid, envinfo.me_meta2_sign,
envinfo.me_meta0_txnid, envinfo.me_meta0_sign)) {
if (meta_eq(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign,
envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign)) {
print(" - meta-%d and meta-%d are clashed\n", 2, 0);
++problems_meta;
}
@ -1042,15 +1042,15 @@ int main(int argc, char *argv[]) {
problems_freedb = process_db(FREE_DBI, "free", handle_freedb, false);
if (verbose) {
uint64_t value = envinfo.me_mapsize / envstat.ms_psize;
uint64_t value = envinfo.mi_mapsize / envstat.ms_psize;
double percent = value / 100.0;
print(" - pages info: %" PRIu64 " total", value);
value = envinfo.me_geo.current / envinfo.me_dxb_pagesize;
value = envinfo.mi_geo.current / envinfo.mi_dxb_pagesize;
print(", backed %" PRIu64 " (%.1f%%)", value, value / percent);
print(", allocated %" PRIu64 " (%.1f%%)", lastpgno, lastpgno / percent);
if (verbose > 1) {
value = envinfo.me_mapsize / envstat.ms_psize - lastpgno;
value = envinfo.mi_mapsize / envstat.ms_psize - lastpgno;
print(", remained %" PRIu64 " (%.1f%%)", value, value / percent);
value = lastpgno - freedb_pages;
@ -1066,7 +1066,7 @@ int main(int argc, char *argv[]) {
}
value =
envinfo.me_mapsize / envstat.ms_psize - lastpgno + reclaimable_pages;
envinfo.mi_mapsize / envstat.ms_psize - lastpgno + reclaimable_pages;
print(", available %" PRIu64 " (%.1f%%)\n", value, value / percent);
}

View File

@ -122,8 +122,8 @@ static int dumpit(MDBX_txn *txn, MDBX_dbi dbi, char *name) {
if (name)
printf("database=%s\n", name);
printf("type=btree\n");
printf("mapsize=%" PRIu64 "\n", info.me_mapsize);
printf("maxreaders=%u\n", info.me_maxreaders);
printf("mapsize=%" PRIu64 "\n", info.mi_mapsize);
printf("maxreaders=%u\n", info.mi_maxreaders);
for (i = 0; dbflags[i].bit; i++)
if (flags & dbflags[i].bit)

View File

@ -137,7 +137,7 @@ static void readhdr(void) {
if (ptr)
*ptr = '\0';
i = sscanf((char *)dbuf.iov_base + STRLENOF("mapsize="), "%" PRIu64 "",
&envinfo.me_mapsize);
&envinfo.mi_mapsize);
if (i != 1) {
fprintf(stderr, "%s: line %" PRIiPTR ": invalid mapsize %s\n", prog,
lineno, (char *)dbuf.iov_base + STRLENOF("mapsize="));
@ -150,7 +150,7 @@ static void readhdr(void) {
if (ptr)
*ptr = '\0';
i = sscanf((char *)dbuf.iov_base + STRLENOF("maxreaders="), "%u",
&envinfo.me_maxreaders);
&envinfo.mi_maxreaders);
if (i != 1) {
fprintf(stderr, "%s: line %" PRIiPTR ": invalid maxreaders %s\n", prog,
lineno, (char *)dbuf.iov_base + STRLENOF("maxreaders="));
@ -393,20 +393,20 @@ int main(int argc, char *argv[]) {
mdbx_env_set_maxdbs(env, 2);
if (envinfo.me_maxreaders)
mdbx_env_set_maxreaders(env, envinfo.me_maxreaders);
if (envinfo.mi_maxreaders)
mdbx_env_set_maxreaders(env, envinfo.mi_maxreaders);
if (envinfo.me_mapsize) {
if (envinfo.me_mapsize > SIZE_MAX) {
if (envinfo.mi_mapsize) {
if (envinfo.mi_mapsize > SIZE_MAX) {
fprintf(stderr, "mdbx_env_set_mapsize failed, error %d %s\n", rc,
mdbx_strerror(MDBX_TOO_LARGE));
return EXIT_FAILURE;
}
mdbx_env_set_mapsize(env, (size_t)envinfo.me_mapsize);
mdbx_env_set_mapsize(env, (size_t)envinfo.mi_mapsize);
}
#ifdef MDBX_FIXEDMAP
if (info.me_mapaddr)
if (info.mi_mapaddr)
envflags |= MDBX_FIXEDMAP;
#endif

View File

@ -157,29 +157,29 @@ int main(int argc, char *argv[]) {
(void)mdbx_env_info(env, &mei, sizeof(mei));
printf("Environment Info\n");
printf(" Pagesize: %u\n", mst.ms_psize);
if (mei.me_geo.lower != mei.me_geo.upper) {
if (mei.mi_geo.lower != mei.mi_geo.upper) {
printf(" Dynamic datafile: %" PRIu64 "..%" PRIu64 " bytes (+%" PRIu64
"/-%" PRIu64 "), %" PRIu64 "..%" PRIu64 " pages (+%" PRIu64
"/-%" PRIu64 ")\n",
mei.me_geo.lower, mei.me_geo.upper, mei.me_geo.grow,
mei.me_geo.shrink, mei.me_geo.lower / mst.ms_psize,
mei.me_geo.upper / mst.ms_psize, mei.me_geo.grow / mst.ms_psize,
mei.me_geo.shrink / mst.ms_psize);
mei.mi_geo.lower, mei.mi_geo.upper, mei.mi_geo.grow,
mei.mi_geo.shrink, mei.mi_geo.lower / mst.ms_psize,
mei.mi_geo.upper / mst.ms_psize, mei.mi_geo.grow / mst.ms_psize,
mei.mi_geo.shrink / mst.ms_psize);
printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n",
mei.me_geo.current, mei.me_geo.current / mst.ms_psize);
mei.mi_geo.current, mei.mi_geo.current / mst.ms_psize);
} else {
printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n",
mei.me_geo.current, mei.me_geo.current / mst.ms_psize);
mei.mi_geo.current, mei.mi_geo.current / mst.ms_psize);
}
printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n",
mei.me_mapsize, mei.me_mapsize / mst.ms_psize);
printf(" Number of pages used: %" PRIu64 "\n", mei.me_last_pgno + 1);
printf(" Last transaction ID: %" PRIu64 "\n", mei.me_recent_txnid);
mei.mi_mapsize, mei.mi_mapsize / mst.ms_psize);
printf(" Number of pages used: %" PRIu64 "\n", mei.mi_last_pgno + 1);
printf(" Last transaction ID: %" PRIu64 "\n", mei.mi_recent_txnid);
printf(" Tail transaction ID: %" PRIu64 " (%" PRIi64 ")\n",
mei.me_latter_reader_txnid,
mei.me_latter_reader_txnid - mei.me_recent_txnid);
printf(" Max readers: %u\n", mei.me_maxreaders);
printf(" Number of readers used: %u\n", mei.me_numreaders);
mei.mi_latter_reader_txnid,
mei.mi_latter_reader_txnid - mei.mi_recent_txnid);
printf(" Max readers: %u\n", mei.mi_maxreaders);
printf(" Number of readers used: %u\n", mei.mi_numreaders);
} else {
/* LY: zap warnings from gcc */
memset(&mst, 0, sizeof(mst));
@ -234,7 +234,7 @@ int main(int argc, char *argv[]) {
}
iptr = data.iov_base;
pages += *iptr;
if (envinfo && mei.me_latter_reader_txnid > *(size_t *)key.iov_base)
if (envinfo && mei.mi_latter_reader_txnid > *(size_t *)key.iov_base)
reclaimable += *iptr;
if (freinfo > 1) {
char *bad = "";
@ -268,18 +268,18 @@ int main(int argc, char *argv[]) {
}
mdbx_cursor_close(cursor);
if (envinfo) {
uint64_t value = mei.me_mapsize / mst.ms_psize;
uint64_t value = mei.mi_mapsize / mst.ms_psize;
double percent = value / 100.0;
printf("Page Allocation Info\n");
printf(" Max pages: %" PRIu64 " 100%%\n", value);
value = mei.me_last_pgno + 1;
value = mei.mi_last_pgno + 1;
printf(" Pages used: %" PRIu64 " %.1f%%\n", value, value / percent);
value = mei.me_mapsize / mst.ms_psize - (mei.me_last_pgno + 1);
value = mei.mi_mapsize / mst.ms_psize - (mei.mi_last_pgno + 1);
printf(" Remained: %" PRIu64 " %.1f%%\n", value, value / percent);
value = mei.me_last_pgno + 1 - pages;
value = mei.mi_last_pgno + 1 - pages;
printf(" Used now: %" PRIu64 " %.1f%%\n", value, value / percent);
value = pages;
@ -292,7 +292,7 @@ int main(int argc, char *argv[]) {
printf(" Reclaimable: %" PRIu64 " %.1f%%\n", value, value / percent);
value =
mei.me_mapsize / mst.ms_psize - (mei.me_last_pgno + 1) + reclaimable;
mei.mi_mapsize / mst.ms_psize - (mei.mi_last_pgno + 1) + reclaimable;
printf(" Available: %" PRIu64 " %.1f%%\n", value, value / percent);
} else
printf(" Free pages: %" PRIaPGNO "\n", pages);