mdbx: fix/rework shrinking, add MDBX_SHRINK_ALLOWED.

Change-Id: I014440850aa4be927843aa2a6a268794a4da9b2a
This commit is contained in:
Leo Yuriev 2017-07-21 10:30:46 +03:00
parent 599711a007
commit 242baf761f
2 changed files with 54 additions and 32 deletions

View File

@ -669,6 +669,8 @@ struct MDBX_env {
/* Failed to update the meta page. Probably an I/O error. */ /* Failed to update the meta page. Probably an I/O error. */
#define MDBX_FATAL_ERROR UINT32_C(0x80000000) #define MDBX_FATAL_ERROR UINT32_C(0x80000000)
/* Additional flag for mdbx_sync_locked() */
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
/* Some fields are initialized. */ /* Some fields are initialized. */
#define MDBX_ENV_ACTIVE UINT32_C(0x20000000) #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
/* me_txkey is set */ /* me_txkey is set */

View File

@ -1561,8 +1561,10 @@ static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno,
mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes); mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
if (rc == MDBX_SUCCESS) { if (rc == MDBX_SUCCESS) {
if (env->me_txn0) if (env->me_txn) {
env->me_txn0->mt_end_pgno = size_pgno; mdbx_tassert(env->me_txn, size_pgno >= env->me_txn->mt_next_pgno);
env->me_txn->mt_end_pgno = size_pgno;
}
env->me_dbgeo.now = size_bytes; env->me_dbgeo.now = size_bytes;
env->me_dbgeo.upper = limit_bytes; env->me_dbgeo.upper = limit_bytes;
} else { } else {
@ -1889,14 +1891,20 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
env, env,
mdbx_roundup2(pgno2bytes(env, txn->mt_next_pgno + head->mm_geo.grow), mdbx_roundup2(pgno2bytes(env, txn->mt_next_pgno + head->mm_geo.grow),
env->me_os_psize)); env->me_os_psize));
while (next >= growth_pgno)
growth_pgno = bytes2pgno(
env, mdbx_roundup2(pgno2bytes(env, growth_pgno + head->mm_geo.grow),
env->me_os_psize));
if (growth_pgno > head->mm_geo.upper) if (growth_pgno > head->mm_geo.upper)
growth_pgno = head->mm_geo.upper; growth_pgno = head->mm_geo.upper;
mdbx_info("try growth datafile to %" PRIaPGNO " pages (+%" PRIaPGNO ")", mdbx_info("try growth datafile to %" PRIaPGNO " pages (+%" PRIaPGNO ")",
growth_pgno, growth_pgno - txn->mt_end_pgno); growth_pgno, growth_pgno - txn->mt_end_pgno);
rc = mdbx_mapresize(env, growth_pgno, head->mm_geo.upper); rc = mdbx_mapresize(env, growth_pgno, head->mm_geo.upper);
if (rc == MDBX_SUCCESS) if (rc == MDBX_SUCCESS) {
mdbx_tassert(env->me_txn, txn->mt_end_pgno >= next);
continue; continue;
}
mdbx_warning("unable growth datafile to %" PRIaPGNO "pages (+%" PRIaPGNO mdbx_warning("unable growth datafile to %" PRIaPGNO "pages (+%" PRIaPGNO
"), errcode %d", "), errcode %d",
@ -2195,7 +2203,7 @@ int mdbx_env_sync(MDBX_env *env, int force) {
container_of(head, MDBX_page, mp_data)->mp_pgno, container_of(head, MDBX_page, mp_data)->mp_pgno,
mdbx_durable_str(head), env->me_sync_pending); mdbx_durable_str(head), env->me_sync_pending);
MDBX_meta meta = *head; MDBX_meta meta = *head;
int rc = mdbx_sync_locked(env, flags, &meta); int rc = mdbx_sync_locked(env, flags | MDBX_SHRINK_ALLOWED, &meta);
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
if (outside_txn) if (outside_txn)
mdbx_txn_unlock(env); mdbx_txn_unlock(env);
@ -2509,9 +2517,11 @@ int mdbx_txn_renew(MDBX_txn *txn) {
rc = mdbx_txn_renew0(txn, MDBX_TXN_RDONLY); rc = mdbx_txn_renew0(txn, MDBX_TXN_RDONLY);
if (rc == MDBX_SUCCESS) { if (rc == MDBX_SUCCESS) {
mdbx_debug("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "", mdbx_debug("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO
"/%" PRIaPGNO,
txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w',
(void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root,
txn->mt_dbs[FREE_DBI].md_root);
} }
return rc; return rc;
} }
@ -2632,9 +2642,11 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
txn->mt_owner = mdbx_thread_self(); txn->mt_owner = mdbx_thread_self();
txn->mt_signature = MDBX_MT_SIGNATURE; txn->mt_signature = MDBX_MT_SIGNATURE;
*ret = txn; *ret = txn;
mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "", mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO
"/%" PRIaPGNO,
txn->mt_txnid, (flags & MDBX_RDONLY) ? 'r' : 'w', (void *)txn, txn->mt_txnid, (flags & MDBX_RDONLY) ? 'r' : 'w', (void *)txn,
(void *)env, txn->mt_dbs[MAIN_DBI].md_root); (void *)env, txn->mt_dbs[MAIN_DBI].md_root,
txn->mt_dbs[FREE_DBI].md_root);
} }
return rc; return rc;
@ -2696,10 +2708,12 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
/* Export or close DBI handles opened in this txn */ /* Export or close DBI handles opened in this txn */
mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); mdbx_dbis_update(txn, mode & MDBX_END_UPDATE);
mdbx_debug("%s txn %" PRIaTXN "%c %p on mdbenv %p, root page %" PRIaPGNO "", mdbx_debug("%s txn %" PRIaTXN "%c %p on mdbenv %p, root page %" PRIaPGNO
"/%" PRIaPGNO,
names[mode & MDBX_END_OPMASK], txn->mt_txnid, names[mode & MDBX_END_OPMASK], txn->mt_txnid,
(txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn,
(void *)env, txn->mt_dbs[MAIN_DBI].md_root); (void *)env, txn->mt_dbs[MAIN_DBI].md_root,
txn->mt_dbs[FREE_DBI].md_root);
if (F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY)) { if (F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY)) {
if (txn->mt_ro_reader) { if (txn->mt_ro_reader) {
@ -3456,9 +3470,10 @@ int mdbx_txn_commit(MDBX_txn *txn) {
!(txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS))) !(txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)))
goto done; goto done;
mdbx_debug( mdbx_debug("committing txn %" PRIaTXN " %p on mdbenv %p, root page %" PRIaPGNO
"committing txn %" PRIaTXN " %p on mdbenv %p, root page %" PRIaPGNO "", "/%" PRIaPGNO,
txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); txn->mt_txnid, (void *)txn, (void *)env,
txn->mt_dbs[MAIN_DBI].md_root, txn->mt_dbs[FREE_DBI].md_root);
/* Update DB root pointers */ /* Update DB root pointers */
if (txn->mt_numdbs > CORE_DBS) { if (txn->mt_numdbs > CORE_DBS) {
@ -3505,7 +3520,8 @@ int mdbx_txn_commit(MDBX_txn *txn) {
meta.mm_canary = txn->mt_canary; meta.mm_canary = txn->mt_canary;
mdbx_meta_set_txnid(env, &meta, txn->mt_txnid); mdbx_meta_set_txnid(env, &meta, txn->mt_txnid);
rc = mdbx_sync_locked(env, env->me_flags | txn->mt_flags, &meta); rc = mdbx_sync_locked(
env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, &meta);
} }
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
@ -3814,11 +3830,12 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS)); pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS));
mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0); mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0);
mdbx_assert(env, !META_IS_STEADY(head) || env->me_sync_pending != 0); mdbx_assert(env, !META_IS_STEADY(head) || env->me_sync_pending != 0);
mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now);
const size_t usedbytes = const size_t usedbytes =
mdbx_roundup2(pgno2bytes(env, pending->mm_geo.next), env->me_os_psize); mdbx_roundup2(pgno2bytes(env, pending->mm_geo.next), env->me_os_psize);
if (env->me_sync_threshold && env->me_sync_pending >= env->me_sync_threshold) if (env->me_sync_threshold && env->me_sync_pending >= env->me_sync_threshold)
flags &= MDBX_WRITEMAP; flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED;
/* LY: step#1 - sync previously written/updated data-pages */ /* LY: step#1 - sync previously written/updated data-pages */
int rc = MDBX_RESULT_TRUE; int rc = MDBX_RESULT_TRUE;
@ -3850,11 +3867,12 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
#else #else
/* LY: check conditions to shrink datafile */ /* LY: check conditions to shrink datafile */
pgno_t shrink_pgno_delta = 0; pgno_t shrink_pgno_delta = 0;
const pgno_t shrink_pgno = pending->mm_geo.next /* + pending->mm_geo.grow */; if ((flags & MDBX_SHRINK_ALLOWED) &&
if (pending->mm_geo.now > shrink_pgno && pending->mm_geo.shrink && pending->mm_geo.next < head->mm_geo.next) {
unlikely(pending->mm_geo.now - pending->mm_geo.shrink >= shrink_pgno)) { const pgno_t shrink_pgno =
if (pending->mm_geo.now > shrink_pgno && pending->mm_geo.next /* + pending->mm_geo.grow */;
pending->mm_geo.now - pending->mm_geo.shrink >= shrink_pgno) { if (pending->mm_geo.now > shrink_pgno && pending->mm_geo.shrink &&
unlikely(pending->mm_geo.now - pending->mm_geo.shrink >= shrink_pgno)) {
shrink_pgno_delta = pending->mm_geo.now - shrink_pgno; shrink_pgno_delta = pending->mm_geo.now - shrink_pgno;
pending->mm_geo.now = shrink_pgno; pending->mm_geo.now = shrink_pgno;
} }
@ -3896,15 +3914,15 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
} }
/* LY: step#2 - update meta-page. */ /* LY: step#2 - update meta-page. */
mdbx_debug( mdbx_debug("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO
"writing meta%" PRIaPGNO " (%s, was %" PRIaTXN ", %s), root %" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
"/%" PRIaPGNO ", " " +%u -%u, txn_id %" PRIaTXN ", %s",
"txn_id %" PRIaTXN ", %s", container_of(target, MDBX_page, mp_data)->mp_pgno,
container_of(target, MDBX_page, mp_data)->mp_pgno, pending->mm_dbs[MAIN_DBI].md_root,
(target == head) ? "head" : "tail", mdbx_meta_txnid_stable(env, target), pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower,
mdbx_durable_str((const MDBX_meta *)target), pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper,
pending->mm_dbs[MAIN_DBI].md_root, pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.grow, pending->mm_geo.shrink, pending->mm_txnid_a,
pending->mm_txnid_a, mdbx_durable_str(pending)); mdbx_durable_str(pending));
mdbx_debug("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO mdbx_debug("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO
"/%" PRIaPGNO, "/%" PRIaPGNO,
@ -4680,7 +4698,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
mdbx_ensure(env, mdbx_meta_eq(env, &meta, head)); mdbx_ensure(env, mdbx_meta_eq(env, &meta, head));
mdbx_meta_set_txnid(env, &meta, txnid + 1); mdbx_meta_set_txnid(env, &meta, txnid + 1);
env->me_sync_pending += env->me_psize; env->me_sync_pending += env->me_psize;
err = mdbx_sync_locked(env, env->me_flags, &meta); err = mdbx_sync_locked(env, env->me_flags | MDBX_SHRINK_ALLOWED, &meta);
if (err) { if (err) {
mdbx_info("error %d, while updating meta.geo: " mdbx_info("error %d, while updating meta.geo: "
"from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO
@ -8374,8 +8392,10 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
} }
} }
} }
} else } else {
mdbx_debug("root page doesn't need rebalancing"); mdbx_debug("root page %" PRIaPGNO " doesn't need rebalancing",
mp->mp_pgno);
}
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }