mdbx: Merge branch 'master' into stable/0.1

Change-Id: I7460e6a8b42c9bbeadfdf20d326c31c1d4a98969
This commit is contained in:
Leo Yuriev 2018-06-01 16:42:48 +03:00
commit 17d3e7190c

View File

@ -982,7 +982,8 @@ static int __must_check_result mdbx_page_split(MDBX_cursor *mc,
MDBX_val *newdata,
pgno_t newpgno, unsigned nflags);
static int __must_check_result mdbx_read_header(MDBX_env *env, MDBX_meta *meta);
static int __must_check_result mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
uint64_t *filesize);
static int __must_check_result mdbx_sync_locked(MDBX_env *env, unsigned flags,
MDBX_meta *const pending);
static void mdbx_env_close0(MDBX_env *env);
@ -4270,11 +4271,17 @@ fail:
/* Read the environment parameters of a DB environment
* before mapping it into memory. */
static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) {
static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
uint64_t *filesize) {
assert(offsetof(MDBX_page, mp_meta) == PAGEHDRSZ);
int rc = mdbx_filesize(env->me_fd, filesize);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
memset(meta, 0, sizeof(MDBX_meta));
meta->mm_datasync_sign = MDBX_DATASIGN_WEAK;
int rc = MDBX_CORRUPTED;
rc = MDBX_CORRUPTED;
/* Read twice all meta pages so we can find the latest one. */
unsigned loop_limit = NUM_METAS * 2;
@ -4410,6 +4417,17 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) {
continue;
}
/* LY: check filesize & used_bytes */
const uint64_t used_bytes =
page.mp_meta.mm_geo.next * (uint64_t)page.mp_meta.mm_psize;
if (used_bytes > *filesize) {
mdbx_notice("meta[%u] used-bytes (%" PRIu64 ") beyond filesize (%" PRIu64
"), skip it",
meta_number, used_bytes, *filesize);
rc = MDBX_CORRUPTED;
continue;
}
/* LY: check mapsize limits */
const uint64_t mapsize_min =
page.mp_meta.mm_geo.lower * (uint64_t)page.mp_meta.mm_psize;
@ -4428,8 +4446,6 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) {
if (mapsize_max > MAX_MAPSIZE ||
MAX_PAGENO < mdbx_roundup2((size_t)mapsize_max, env->me_os_psize) /
(size_t)page.mp_meta.mm_psize) {
const uint64_t used_bytes =
page.mp_meta.mm_geo.next * (uint64_t)page.mp_meta.mm_psize;
if (page.mp_meta.mm_geo.next - 1 > MAX_PAGENO ||
used_bytes > MAX_MAPSIZE) {
mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), skip it",
@ -4592,7 +4608,30 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
const size_t usedbytes = pgno_align2os_bytes(env, pending->mm_geo.next);
if (env->me_sync_threshold && env->me_sync_pending >= env->me_sync_threshold)
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED;
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
/* LY: check conditions to shrink datafile */
const pgno_t backlog_gap =
pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env);
pgno_t shrink = 0;
if ((flags & MDBX_SHRINK_ALLOWED) && pending->mm_geo.shrink &&
pending->mm_geo.now - pending->mm_geo.next >
pending->mm_geo.shrink + backlog_gap) {
const pgno_t aligner =
pending->mm_geo.grow ? pending->mm_geo.grow : pending->mm_geo.shrink;
const pgno_t with_backlog_gap = pending->mm_geo.next + backlog_gap;
const pgno_t aligned = pgno_align2os_pgno(
env, with_backlog_gap + aligner - with_backlog_gap % aligner);
const pgno_t bottom =
(aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower;
if (pending->mm_geo.now > bottom) {
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
shrink = pending->mm_geo.now - bottom;
pending->mm_geo.now = bottom;
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a)
mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1);
}
}
/* LY: step#1 - sync previously written/updated data-pages */
int rc = MDBX_RESULT_TRUE;
@ -4619,28 +4658,6 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
}
}
/* LY: check conditions to shrink datafile */
const pgno_t backlog_gap =
pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env);
pgno_t shrink = 0;
if ((flags & MDBX_SHRINK_ALLOWED) && pending->mm_geo.shrink &&
pending->mm_geo.now - pending->mm_geo.next >
pending->mm_geo.shrink + backlog_gap) {
const pgno_t aligner =
pending->mm_geo.grow ? pending->mm_geo.grow : pending->mm_geo.shrink;
const pgno_t with_backlog_gap = pending->mm_geo.next + backlog_gap;
const pgno_t aligned = pgno_align2os_pgno(
env, with_backlog_gap + aligner - with_backlog_gap % aligner);
const pgno_t bottom =
(aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower;
if (pending->mm_geo.now > bottom) {
shrink = pending->mm_geo.now - bottom;
pending->mm_geo.now = bottom;
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a)
mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1);
}
}
/* Steady or Weak */
if (env->me_sync_pending == 0) {
pending->mm_datasync_sign = mdbx_meta_sign(pending);
@ -5238,9 +5255,10 @@ int __cold mdbx_env_get_maxreaders(MDBX_env *env, unsigned *readers) {
/* Further setup required for opening an MDBX environment */
static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
uint64_t filesize_before_mmap;
MDBX_meta meta;
int rc = MDBX_RESULT_FALSE;
int err = mdbx_read_header(env, &meta);
int err = mdbx_read_header(env, &meta, &filesize_before_mmap);
if (unlikely(err != MDBX_SUCCESS)) {
if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA ||
(env->me_flags & MDBX_RDONLY) != 0)
@ -5266,12 +5284,12 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
if (unlikely(err != MDBX_SUCCESS))
return err;
err = mdbx_ftruncate(env->me_fd, env->me_dbgeo.now);
err = mdbx_ftruncate(env->me_fd, filesize_before_mmap = env->me_dbgeo.now);
if (unlikely(err != MDBX_SUCCESS))
return err;
#ifndef NDEBUG /* just for checking */
err = mdbx_read_header(env, &meta);
err = mdbx_read_header(env, &meta, &filesize_before_mmap);
if (unlikely(err != MDBX_SUCCESS))
return err;
#endif
@ -5359,11 +5377,6 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
env->me_dbgeo.shrink = pgno2bytes(env, meta.mm_geo.shrink);
}
uint64_t filesize_before_mmap;
err = mdbx_filesize(env->me_fd, &filesize_before_mmap);
if (unlikely(err != MDBX_SUCCESS))
return err;
const size_t expected_bytes =
mdbx_roundup2(pgno2bytes(env, meta.mm_geo.now), env->me_os_psize);
mdbx_ensure(env, expected_bytes >= used_bytes);
@ -7320,7 +7333,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
static int mdbx_cursor_touch(MDBX_cursor *mc) {
int rc = MDBX_SUCCESS;
if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & (DB_DIRTY | DB_DUPDATA))) {
if (mc->mc_dbi >= CORE_DBS &&
(*mc->mc_dbflag & (DB_DIRTY | DB_DUPDATA)) == 0) {
/* Touch DB record of named DB */
MDBX_cursor mc2;
MDBX_xcursor mcx;