From 38369bd24bbc0a4cfedc4f3a63b028f8b9af7709 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Fri, 1 Jun 2018 13:15:10 +0300 Subject: [PATCH 1/2] mdbx: force steady-sync when shrinking DB (fix corruption bug). Change-Id: I636ee2ad04f28d4038d74bdf2c7061ab885f4b82 --- src/mdbx.c | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/mdbx.c b/src/mdbx.c index 0b753972..1ade6e59 100644 --- a/src/mdbx.c +++ b/src/mdbx.c @@ -4592,7 +4592,30 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, const size_t usedbytes = pgno_align2os_bytes(env, pending->mm_geo.next); if (env->me_sync_threshold && env->me_sync_pending >= env->me_sync_threshold) - flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; + flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ + + /* LY: check conditions to shrink datafile */ + const pgno_t backlog_gap = + pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env); + pgno_t shrink = 0; + if ((flags & MDBX_SHRINK_ALLOWED) && pending->mm_geo.shrink && + pending->mm_geo.now - pending->mm_geo.next > + pending->mm_geo.shrink + backlog_gap) { + const pgno_t aligner = + pending->mm_geo.grow ? pending->mm_geo.grow : pending->mm_geo.shrink; + const pgno_t with_backlog_gap = pending->mm_geo.next + backlog_gap; + const pgno_t aligned = pgno_align2os_pgno( + env, with_backlog_gap + aligner - with_backlog_gap % aligner); + const pgno_t bottom = + (aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower; + if (pending->mm_geo.now > bottom) { + flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */ + shrink = pending->mm_geo.now - bottom; + pending->mm_geo.now = bottom; + if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a) + mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1); + } + } /* LY: step#1 - sync previously written/updated data-pages */ int rc = MDBX_RESULT_TRUE; @@ -4619,28 +4642,6 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, } } - /* LY: check conditions to shrink datafile */ - const pgno_t backlog_gap = - pending->mm_dbs[FREE_DBI].md_depth + mdbx_backlog_extragap(env); - pgno_t shrink = 0; - if ((flags & MDBX_SHRINK_ALLOWED) && pending->mm_geo.shrink && - pending->mm_geo.now - pending->mm_geo.next > - pending->mm_geo.shrink + backlog_gap) { - const pgno_t aligner = - pending->mm_geo.grow ? pending->mm_geo.grow : pending->mm_geo.shrink; - const pgno_t with_backlog_gap = pending->mm_geo.next + backlog_gap; - const pgno_t aligned = pgno_align2os_pgno( - env, with_backlog_gap + aligner - with_backlog_gap % aligner); - const pgno_t bottom = - (aligned > pending->mm_geo.lower) ? aligned : pending->mm_geo.lower; - if (pending->mm_geo.now > bottom) { - shrink = pending->mm_geo.now - bottom; - pending->mm_geo.now = bottom; - if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a) - mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1); - } - } - /* Steady or Weak */ if (env->me_sync_pending == 0) { pending->mm_datasync_sign = mdbx_meta_sign(pending); @@ -7320,7 +7321,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, static int mdbx_cursor_touch(MDBX_cursor *mc) { int rc = MDBX_SUCCESS; - if (mc->mc_dbi >= CORE_DBS && !(*mc->mc_dbflag & (DB_DIRTY | DB_DUPDATA))) { + if (mc->mc_dbi >= CORE_DBS && + (*mc->mc_dbflag & (DB_DIRTY | DB_DUPDATA)) == 0) { /* Touch DB record of named DB */ MDBX_cursor mc2; MDBX_xcursor mcx; From fdc248384e5f2c7ce515107765da8dc77a1e5fb7 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Fri, 1 Jun 2018 13:27:20 +0300 Subject: [PATCH 2/2] mdbx: skip meta if usedbytes beyond oef. Change-Id: I1e95136bce7169b7ed612f9746d0cdec43caca14 --- src/mdbx.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/mdbx.c b/src/mdbx.c index 1ade6e59..2ac91e9e 100644 --- a/src/mdbx.c +++ b/src/mdbx.c @@ -982,7 +982,8 @@ static int __must_check_result mdbx_page_split(MDBX_cursor *mc, MDBX_val *newdata, pgno_t newpgno, unsigned nflags); -static int __must_check_result mdbx_read_header(MDBX_env *env, MDBX_meta *meta); +static int __must_check_result mdbx_read_header(MDBX_env *env, MDBX_meta *meta, + uint64_t *filesize); static int __must_check_result mdbx_sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending); static void mdbx_env_close0(MDBX_env *env); @@ -4270,11 +4271,17 @@ fail: /* Read the environment parameters of a DB environment * before mapping it into memory. */ -static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) { +static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, + uint64_t *filesize) { assert(offsetof(MDBX_page, mp_meta) == PAGEHDRSZ); + + int rc = mdbx_filesize(env->me_fd, filesize); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + memset(meta, 0, sizeof(MDBX_meta)); meta->mm_datasync_sign = MDBX_DATASIGN_WEAK; - int rc = MDBX_CORRUPTED; + rc = MDBX_CORRUPTED; /* Read twice all meta pages so we can find the latest one. */ unsigned loop_limit = NUM_METAS * 2; @@ -4410,6 +4417,17 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) { continue; } + /* LY: check filesize & used_bytes */ + const uint64_t used_bytes = + page.mp_meta.mm_geo.next * (uint64_t)page.mp_meta.mm_psize; + if (used_bytes > *filesize) { + mdbx_notice("meta[%u] used-bytes (%" PRIu64 ") beyond filesize (%" PRIu64 + "), skip it", + meta_number, used_bytes, *filesize); + rc = MDBX_CORRUPTED; + continue; + } + /* LY: check mapsize limits */ const uint64_t mapsize_min = page.mp_meta.mm_geo.lower * (uint64_t)page.mp_meta.mm_psize; @@ -4428,8 +4446,6 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta) { if (mapsize_max > MAX_MAPSIZE || MAX_PAGENO < mdbx_roundup2((size_t)mapsize_max, env->me_os_psize) / (size_t)page.mp_meta.mm_psize) { - const uint64_t used_bytes = - page.mp_meta.mm_geo.next * (uint64_t)page.mp_meta.mm_psize; if (page.mp_meta.mm_geo.next - 1 > MAX_PAGENO || used_bytes > MAX_MAPSIZE) { mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), skip it", @@ -5239,9 +5255,10 @@ int __cold mdbx_env_get_maxreaders(MDBX_env *env, unsigned *readers) { /* Further setup required for opening an MDBX environment */ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) { + uint64_t filesize_before_mmap; MDBX_meta meta; int rc = MDBX_RESULT_FALSE; - int err = mdbx_read_header(env, &meta); + int err = mdbx_read_header(env, &meta, &filesize_before_mmap); if (unlikely(err != MDBX_SUCCESS)) { if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || (env->me_flags & MDBX_RDONLY) != 0) @@ -5267,12 +5284,12 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) { if (unlikely(err != MDBX_SUCCESS)) return err; - err = mdbx_ftruncate(env->me_fd, env->me_dbgeo.now); + err = mdbx_ftruncate(env->me_fd, filesize_before_mmap = env->me_dbgeo.now); if (unlikely(err != MDBX_SUCCESS)) return err; #ifndef NDEBUG /* just for checking */ - err = mdbx_read_header(env, &meta); + err = mdbx_read_header(env, &meta, &filesize_before_mmap); if (unlikely(err != MDBX_SUCCESS)) return err; #endif @@ -5360,11 +5377,6 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) { env->me_dbgeo.shrink = pgno2bytes(env, meta.mm_geo.shrink); } - uint64_t filesize_before_mmap; - err = mdbx_filesize(env->me_fd, &filesize_before_mmap); - if (unlikely(err != MDBX_SUCCESS)) - return err; - const size_t expected_bytes = mdbx_roundup2(pgno2bytes(env, meta.mm_geo.now), env->me_os_psize); mdbx_ensure(env, expected_bytes >= used_bytes);