mdbx: misc refines (minor).

Change-Id: Ia58e2d75e320bbd47dd352e082cf6f41afde075f
This commit is contained in:
Leo Yuriev 2017-07-26 12:23:01 +03:00
parent 1b09605a06
commit 88ca23caa5
3 changed files with 72 additions and 36 deletions

View File

@ -970,7 +970,7 @@ static __inline unsigned mdbx_log2(size_t value) {
/* The percentage of space used in the page, in tenths of a percent. */ /* The percentage of space used in the page, in tenths of a percent. */
#define PAGEFILL(env, p) \ #define PAGEFILL(env, p) \
(1024L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \ (1024UL * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \
((env)->me_psize - PAGEHDRSZ)) ((env)->me_psize - PAGEHDRSZ))
/* The minimum page fill factor, in tenths of a percent. /* The minimum page fill factor, in tenths of a percent.
* Pages emptier than this are candidates for merging. */ * Pages emptier than this are candidates for merging. */

View File

@ -1908,8 +1908,7 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
} }
if (rc == MDBX_MAP_FULL && oldest < txn->mt_txnid - 1) { if (rc == MDBX_MAP_FULL && oldest < txn->mt_txnid - 1) {
txnid_t snap = mdbx_oomkick(env, oldest); if (mdbx_oomkick(env, oldest) > oldest)
if (snap > oldest)
continue; continue;
} }
} }
@ -2043,6 +2042,7 @@ static int mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp, MDBX_page **ret) {
else else
mdbx_page_copy(np, mp, env->me_psize); mdbx_page_copy(np, mp, env->me_psize);
} }
mdbx_debug("unspill page %" PRIaPGNO, mp->mp_pgno);
if (tx2 == txn) { if (tx2 == txn) {
/* If in current txn, this page is no longer spilled. /* If in current txn, this page is no longer spilled.
* If it happens to be the last page, truncate the spill list. * If it happens to be the last page, truncate the spill list.
@ -2076,6 +2076,7 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
pgno_t pgno; pgno_t pgno;
int rc; int rc;
mdbx_cassert(mc, !IS_OVERFLOW(mp));
if (!F_ISSET(mp->mp_flags, P_DIRTY)) { if (!F_ISSET(mp->mp_flags, P_DIRTY)) {
if (txn->mt_flags & MDBX_TXN_SPILLS) { if (txn->mt_flags & MDBX_TXN_SPILLS) {
np = NULL; np = NULL;
@ -2085,6 +2086,7 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
if (likely(np)) if (likely(np))
goto done; goto done;
} }
if (unlikely((rc = mdbx_midl_need(&txn->mt_befree_pages, 1)) || if (unlikely((rc = mdbx_midl_need(&txn->mt_befree_pages, 1)) ||
(rc = mdbx_page_alloc(mc, 1, &np, MDBX_ALLOC_ALL)))) (rc = mdbx_page_alloc(mc, 1, &np, MDBX_ALLOC_ALL))))
goto fail; goto fail;
@ -2104,8 +2106,7 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
} else if (txn->mt_parent && !IS_SUBP(mp)) { } else if (txn->mt_parent && !IS_SUBP(mp)) {
MDBX_ID2 mid, *dl = txn->mt_rw_dirtylist; MDBX_ID2 mid, *dl = txn->mt_rw_dirtylist;
pgno = mp->mp_pgno; pgno = mp->mp_pgno;
/* If txn has a parent, make sure the page is in our /* If txn has a parent, make sure the page is in our dirty list. */
* dirty list. */
if (dl[0].mid) { if (dl[0].mid) {
unsigned x = mdbx_mid2l_search(dl, pgno); unsigned x = mdbx_mid2l_search(dl, pgno);
if (x <= dl[0].mid && dl[x].mid == pgno) { if (x <= dl[0].mid && dl[x].mid == pgno) {
@ -2117,9 +2118,11 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
return MDBX_PROBLEM; return MDBX_PROBLEM;
} }
return 0; return MDBX_SUCCESS;
} }
} }
mdbx_debug("clone db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno);
mdbx_cassert(mc, dl[0].mid < MDBX_IDL_UM_MAX); mdbx_cassert(mc, dl[0].mid < MDBX_IDL_UM_MAX);
/* No - copy it */ /* No - copy it */
np = mdbx_page_malloc(txn, 1); np = mdbx_page_malloc(txn, 1);
@ -2130,7 +2133,7 @@ static int mdbx_page_touch(MDBX_cursor *mc) {
rc = mdbx_mid2l_insert(dl, &mid); rc = mdbx_mid2l_insert(dl, &mid);
mdbx_cassert(mc, rc == 0); mdbx_cassert(mc, rc == 0);
} else { } else {
return 0; return MDBX_SUCCESS;
} }
mdbx_page_copy(np, mp, txn->mt_env->me_psize); mdbx_page_copy(np, mp, txn->mt_env->me_psize);
@ -2162,7 +2165,7 @@ done:
} }
} }
} }
return 0; return MDBX_SUCCESS;
fail: fail:
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
@ -2204,14 +2207,13 @@ int mdbx_env_sync(MDBX_env *env, int force) {
pgno2bytes(env, 16 /* FIXME: define threshold */) && pgno2bytes(env, 16 /* FIXME: define threshold */) &&
(flags & MDBX_NOSYNC) == 0) { (flags & MDBX_NOSYNC) == 0) {
assert(((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); assert(((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
const size_t used_size = const size_t usedbytes = pgno_align2os_bytes(env, head->mm_geo.next);
mdbx_roundup2(pgno2bytes(env, head->mm_geo.next), env->me_os_psize);
mdbx_txn_unlock(env); mdbx_txn_unlock(env);
/* LY: pre-sync without holding lock to reduce latency for writer(s) */ /* LY: pre-sync without holding lock to reduce latency for writer(s) */
int rc = (flags & MDBX_WRITEMAP) int rc = (flags & MDBX_WRITEMAP)
? mdbx_msync(&env->me_dxb_mmap, 0, used_size, ? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes,
flags & MDBX_MAPASYNC) flags & MDBX_MAPASYNC)
: mdbx_filesync(env->me_fd, false); : mdbx_filesync(env->me_fd, false);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
@ -3673,6 +3675,16 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) {
continue; continue;
} }
mdbx_debug("read meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO
", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
" +%u -%u, txn_id %" PRIaTXN ", %s",
page.mp_pgno, page.mp_meta.mm_dbs[MAIN_DBI].md_root,
page.mp_meta.mm_dbs[FREE_DBI].md_root, page.mp_meta.mm_geo.lower,
page.mp_meta.mm_geo.next, page.mp_meta.mm_geo.now,
page.mp_meta.mm_geo.upper, page.mp_meta.mm_geo.grow,
page.mp_meta.mm_geo.shrink, page.mp_meta.mm_txnid_a,
mdbx_durable_str(&page.mp_meta));
/* LY: check min-pages value */ /* LY: check min-pages value */
if (page.mp_meta.mm_geo.lower < MIN_PAGENO || if (page.mp_meta.mm_geo.lower < MIN_PAGENO ||
page.mp_meta.mm_geo.lower > MAX_PAGENO) { page.mp_meta.mm_geo.lower > MAX_PAGENO) {
@ -3790,6 +3802,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) {
*meta = page.mp_meta; *meta = page.mp_meta;
if (META_IS_WEAK(meta)) if (META_IS_WEAK(meta))
loop_limit += 1; /* LY: should re-read to hush race with update */ loop_limit += 1; /* LY: should re-read to hush race with update */
mdbx_info("latch meta[%u]", meta_number);
} }
} }
@ -3876,8 +3889,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_assert(env, !META_IS_STEADY(head) || env->me_sync_pending != 0); mdbx_assert(env, !META_IS_STEADY(head) || env->me_sync_pending != 0);
mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now); mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now);
const size_t usedbytes = const size_t usedbytes = pgno_align2os_bytes(env, pending->mm_geo.next);
mdbx_roundup2(pgno2bytes(env, pending->mm_geo.next), env->me_os_psize);
if (env->me_sync_threshold && env->me_sync_pending >= env->me_sync_threshold) if (env->me_sync_threshold && env->me_sync_pending >= env->me_sync_threshold)
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED;
@ -4278,13 +4290,13 @@ LIBMDBX_API int mdbx_env_set_geometry(MDBX_env *env, ssize_t size_lower,
if (shrink_threshold < 0) if (shrink_threshold < 0)
shrink_threshold = pgno2bytes(env, head->mm_geo.shrink); shrink_threshold = pgno2bytes(env, head->mm_geo.shrink);
const size_t used_size = pgno2bytes(env, head->mm_geo.next); const size_t usedbytes = pgno2bytes(env, head->mm_geo.next);
if ((size_t)size_upper < used_size) { if ((size_t)size_upper < usedbytes) {
rc = MDBX_MAP_FULL; rc = MDBX_MAP_FULL;
goto bailout; goto bailout;
} }
if ((size_t)size_now < used_size) if ((size_t)size_now < usedbytes)
size_now = used_size; size_now = usedbytes;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if ((size_t)size_now < env->me_dbgeo.now || if ((size_t)size_now < env->me_dbgeo.now ||
(size_t)size_upper < env->me_dbgeo.upper) { (size_t)size_upper < env->me_dbgeo.upper) {
@ -4546,6 +4558,14 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
#endif #endif
} }
mdbx_info("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO
"/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
" +%u -%u, txn_id %" PRIaTXN ", %s",
meta.mm_dbs[MAIN_DBI].md_root, meta.mm_dbs[FREE_DBI].md_root,
meta.mm_geo.lower, meta.mm_geo.next, meta.mm_geo.now,
meta.mm_geo.upper, meta.mm_geo.grow, meta.mm_geo.shrink,
meta.mm_txnid_a, mdbx_durable_str(&meta));
mdbx_setup_pagesize(env, meta.mm_psize); mdbx_setup_pagesize(env, meta.mm_psize);
if ((env->me_flags & MDBX_RDONLY) /* readonly */ if ((env->me_flags & MDBX_RDONLY) /* readonly */
|| lck_rc != MDBX_RESULT_TRUE /* not exclusive */) { || lck_rc != MDBX_RESULT_TRUE /* not exclusive */) {
@ -4600,8 +4620,16 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
meta.mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper); meta.mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper);
meta.mm_geo.grow = (uint16_t)bytes2pgno(env, env->me_dbgeo.grow); meta.mm_geo.grow = (uint16_t)bytes2pgno(env, env->me_dbgeo.grow);
meta.mm_geo.shrink = (uint16_t)bytes2pgno(env, env->me_dbgeo.shrink); meta.mm_geo.shrink = (uint16_t)bytes2pgno(env, env->me_dbgeo.shrink);
mdbx_ensure(env, meta.mm_geo.now >= meta.mm_geo.next);
mdbx_info("amended: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO
"/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
" +%u -%u, txn_id %" PRIaTXN ", %s",
meta.mm_dbs[MAIN_DBI].md_root, meta.mm_dbs[FREE_DBI].md_root,
meta.mm_geo.lower, meta.mm_geo.next, meta.mm_geo.now,
meta.mm_geo.upper, meta.mm_geo.grow, meta.mm_geo.shrink,
meta.mm_txnid_a, mdbx_durable_str(&meta));
} }
mdbx_ensure(env, meta.mm_geo.now >= meta.mm_geo.next);
} }
uint64_t filesize_before_mmap; uint64_t filesize_before_mmap;
@ -4609,17 +4637,22 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
const size_t expected_bytes = pgno2bytes(env, meta.mm_geo.now); const size_t expected_bytes =
mdbx_roundup2(pgno2bytes(env, meta.mm_geo.now), env->me_os_psize);
const size_t used_bytes = pgno2bytes(env, meta.mm_geo.next); const size_t used_bytes = pgno2bytes(env, meta.mm_geo.next);
mdbx_ensure(env, expected_bytes >= used_bytes); mdbx_ensure(env, expected_bytes >= used_bytes);
if (filesize_before_mmap != expected_bytes) { if (filesize_before_mmap != expected_bytes) {
if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) { if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
mdbx_info("filesize mismatch (expect %" PRIuPTR ", have %" PRIu64 "), " mdbx_info("filesize mismatch (expect %" PRIuPTR "/%" PRIaPGNO
", have %" PRIu64 "/%" PRIaPGNO "), "
"assume collision in non-exclusive mode", "assume collision in non-exclusive mode",
expected_bytes, filesize_before_mmap); expected_bytes, bytes2pgno(env, expected_bytes),
filesize_before_mmap, bytes2pgno(env, filesize_before_mmap));
} else { } else {
mdbx_notice("filesize mismatch (expect %" PRIuPTR ", have %" PRIu64 ")", mdbx_notice("filesize mismatch (expect %" PRIuPTR "/%" PRIaPGNO
expected_bytes, filesize_before_mmap); ", have %" PRIu64 "/%" PRIaPGNO ")",
expected_bytes, bytes2pgno(env, expected_bytes),
filesize_before_mmap, bytes2pgno(env, filesize_before_mmap));
if (filesize_before_mmap < used_bytes) { if (filesize_before_mmap < used_bytes) {
mdbx_error("last-page beyond end-of-file (last %" PRIaPGNO mdbx_error("last-page beyond end-of-file (last %" PRIaPGNO
", have %" PRIaPGNO ")", ", have %" PRIaPGNO ")",
@ -4631,11 +4664,13 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
if (env->me_flags & MDBX_RDONLY) { if (env->me_flags & MDBX_RDONLY) {
mdbx_notice("ignore filesize mismatch in readonly-mode"); mdbx_notice("ignore filesize mismatch in readonly-mode");
} else { } else {
mdbx_info("resize datafile to %" PRIu64 " bytes", expected_bytes); mdbx_info("resize datafile to %" PRIu64 " bytes, %" PRIaPGNO " pages",
expected_bytes, bytes2pgno(env, expected_bytes));
err = mdbx_ftruncate(env->me_fd, expected_bytes); err = mdbx_ftruncate(env->me_fd, expected_bytes);
if (unlikely(err != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
mdbx_error("error %d, while resize datafile to %" PRIu64 " bytes", rc, mdbx_error("error %d, while resize datafile to %" PRIu64
expected_bytes); " bytes, %" PRIaPGNO " pages",
rc, expected_bytes, bytes2pgno(env, expected_bytes));
return err; return err;
} }
filesize_before_mmap = expected_bytes; filesize_before_mmap = expected_bytes;
@ -7146,7 +7181,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, unsigned flags) {
return rc; return rc;
rc = mdbx_cursor_touch(mc); rc = mdbx_cursor_touch(mc);
if (unlikely(rc)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
@ -8450,8 +8485,9 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
} }
} }
} else { } else {
mdbx_debug("root page %" PRIaPGNO " doesn't need rebalancing", mdbx_debug("root page %" PRIaPGNO
mp->mp_pgno); " doesn't need rebalancing (flags 0x%x)",
mp->mp_pgno, mp->mp_flags);
} }
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -8661,7 +8697,7 @@ static int mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
flags |= MDBX_NODUPDATA; flags |= MDBX_NODUPDATA;
} }
rc = mdbx_cursor_set(&mc, key, data, op, &exact); rc = mdbx_cursor_set(&mc, key, data, op, &exact);
if (likely(rc == 0)) { if (likely(rc == MDBX_SUCCESS)) {
/* let mdbx_page_split know about this cursor if needed: /* let mdbx_page_split know about this cursor if needed:
* delete will trigger a rebalance; if it needs to move * delete will trigger a rebalance; if it needs to move
* a node from one page to another, it will have to * a node from one page to another, it will have to
@ -11128,8 +11164,8 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
* во время транзакции) если адрес находится внутри mmap-диапазона * во время транзакции) если адрес находится внутри mmap-диапазона
* и в заголовке страницы нет флажка P_DIRTY. */ * и в заголовке страницы нет флажка P_DIRTY. */
if (env->me_map < (uint8_t *)page) { if (env->me_map < (uint8_t *)page) {
const size_t used_size = pgno2bytes(env, txn->mt_next_pgno); const size_t usedbytes = pgno2bytes(env, txn->mt_next_pgno);
if ((uint8_t *)page < env->me_map + used_size) { if ((uint8_t *)page < env->me_map + usedbytes) {
/* страница внутри диапазона, смотрим на флажки */ /* страница внутри диапазона, смотрим на флажки */
return (page->mp_flags & (P_DIRTY | P_LOOSE | P_KEEP)) return (page->mp_flags & (P_DIRTY | P_LOOSE | P_KEEP))
? MDBX_RESULT_TRUE ? MDBX_RESULT_TRUE

View File

@ -208,13 +208,13 @@ int mdbx_asprintf(char **strp, const char *fmt, ...) {
va_end(ap); va_end(ap);
if (unlikely(needed < 0 || needed >= INT_MAX)) { if (unlikely(needed < 0 || needed >= INT_MAX)) {
*strp = NULL; *strp = nullptr;
va_end(ones); va_end(ones);
return needed; return needed;
} }
*strp = malloc(needed + 1); *strp = malloc(needed + 1);
if (unlikely(*strp == NULL)) { if (unlikely(*strp == nullptr)) {
va_end(ones); va_end(ones);
SetLastError(MDBX_ENOMEM); SetLastError(MDBX_ENOMEM);
return -1; return -1;
@ -231,7 +231,7 @@ int mdbx_asprintf(char **strp, const char *fmt, ...) {
assert(actual == needed); assert(actual == needed);
if (unlikely(actual < 0)) { if (unlikely(actual < 0)) {
free(*strp); free(*strp);
*strp = NULL; *strp = nullptr;
} }
return actual; return actual;
} }
@ -246,7 +246,7 @@ int mdbx_memalign_alloc(size_t alignment, size_t bytes, void **result) {
*result = memalign(alignment, bytes); *result = memalign(alignment, bytes);
return *result ? MDBX_SUCCESS : errno; return *result ? MDBX_SUCCESS : errno;
#elif _POSIX_VERSION >= 200112L #elif _POSIX_VERSION >= 200112L
*result = NULL; *result = nullptr;
return posix_memalign(result, alignment, bytes); return posix_memalign(result, alignment, bytes);
#else #else
#error FIXME #error FIXME