lmdb: refine sync/async writes of meta-pages.

Change-Id: If309023214e60f349762d337f98ee6fd515faa31
This commit is contained in:
Leo Yuriev 2015-04-18 17:17:45 +03:00
parent 347c1d6dfb
commit 38789b6c0a

52
mdb.c
View File

@ -2733,7 +2733,7 @@ mdb_env_sync0(MDB_env *env, int *force)
*force = 1; *force = 1;
if (*force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { if (*force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
if (env->me_flags & MDB_WRITEMAP) { if (env->me_flags & MDB_WRITEMAP) {
int mode = ((env->me_flags & MDB_MAPASYNC) && *force == 0) ? MS_ASYNC : MS_SYNC; int mode = (!*force && (env->me_flags & MDB_MAPASYNC)) ? MS_ASYNC : MS_SYNC;
/* LY: skip meta-pages, sync ones explicit later */ /* LY: skip meta-pages, sync ones explicit later */
size_t data_offset = (env->me_psize * 2 + env->me_os_psize - 1) & ~(env->me_os_psize - 1); size_t data_offset = (env->me_psize * 2 + env->me_os_psize - 1) & ~(env->me_os_psize - 1);
@ -3475,19 +3475,19 @@ again:
if (mdb_page_alloc(&mc, 0, NULL)) { if (mdb_page_alloc(&mc, 0, NULL)) {
rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY); rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY);
if (rc && rc != MDB_NOTFOUND) if (rc && rc != MDB_NOTFOUND)
goto ballout; goto bailout;
break; break;
} }
} }
rc = mdb_cursor_get(&mc, &key, NULL, MDB_SET); rc = mdb_cursor_get(&mc, &key, NULL, MDB_SET);
if (rc != MDB_NOTFOUND) { if (rc != MDB_NOTFOUND) {
if (rc) if (rc)
goto ballout; goto bailout;
mc.mc_flags |= C_RECLAIMING; mc.mc_flags |= C_RECLAIMING;
rc = mdb_cursor_del(&mc, 0); rc = mdb_cursor_del(&mc, 0);
mc.mc_flags &= ~C_RECLAIMING; mc.mc_flags &= ~C_RECLAIMING;
if (rc) if (rc)
goto ballout; goto bailout;
} }
} }
} }
@ -3498,7 +3498,7 @@ again:
/* Make sure last page of freeDB is touched and on freelist */ /* Make sure last page of freeDB is touched and on freelist */
rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY); rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY);
if (rc && rc != MDB_NOTFOUND) if (rc && rc != MDB_NOTFOUND)
goto ballout; goto bailout;
} }
free_pgs = txn->mt_free_pgs; free_pgs = txn->mt_free_pgs;
/* Write to last page of freeDB */ /* Write to last page of freeDB */
@ -3509,7 +3509,7 @@ again:
data.mv_size = MDB_IDL_SIZEOF(free_pgs); data.mv_size = MDB_IDL_SIZEOF(free_pgs);
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
if (rc) if (rc)
goto ballout; goto bailout;
/* Retry if mt_free_pgs[] grew during the Put() */ /* Retry if mt_free_pgs[] grew during the Put() */
free_pgs = txn->mt_free_pgs; free_pgs = txn->mt_free_pgs;
} while (freecnt < free_pgs[0]); } while (freecnt < free_pgs[0]);
@ -3556,26 +3556,26 @@ again:
continue; continue;
if (rc != MDB_NOTFOUND) if (rc != MDB_NOTFOUND)
/* LY: other troubles... */ /* LY: other troubles... */
goto ballout; goto bailout;
/* LY: freedb is empty, will look any free txn-id in high2low order. */ /* LY: freedb is empty, will look any free txn-id in high2low order. */
if (env->me_pglast < 1) { if (env->me_pglast < 1) {
/* LY: not any txn in the past of freedb. */ /* LY: not any txn in the past of freedb. */
rc = MDB_MAP_FULL; rc = MDB_MAP_FULL;
goto ballout; goto bailout;
} }
if (! txn->mt_lifo_reclaimed) { if (! txn->mt_lifo_reclaimed) {
txn->mt_lifo_reclaimed = mdb_midl_alloc(env->me_maxfree_1pg); txn->mt_lifo_reclaimed = mdb_midl_alloc(env->me_maxfree_1pg);
if (! txn->mt_lifo_reclaimed) { if (! txn->mt_lifo_reclaimed) {
rc = ENOMEM; rc = ENOMEM;
goto ballout; goto bailout;
} }
} }
/* LY: append the list. */ /* LY: append the list. */
rc = mdb_midl_append(&txn->mt_lifo_reclaimed, env->me_pglast - 1); rc = mdb_midl_append(&txn->mt_lifo_reclaimed, env->me_pglast - 1);
if (rc) if (rc)
goto ballout; goto bailout;
--env->me_pglast; --env->me_pglast;
/* LY: note that freeDB cleanup is not needed. */ /* LY: note that freeDB cleanup is not needed. */
++cleanup_idx; ++cleanup_idx;
@ -3600,7 +3600,7 @@ again:
data.mv_size = (head_room + 1) * sizeof(pgno_t); data.mv_size = (head_room + 1) * sizeof(pgno_t);
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
if (rc) if (rc)
goto ballout; goto bailout;
/* IDL is initially empty, zero out at least the length */ /* IDL is initially empty, zero out at least the length */
pgs = (pgno_t *)data.mv_data; pgs = (pgno_t *)data.mv_data;
j = head_room > clean_limit ? head_room : 0; j = head_room > clean_limit ? head_room : 0;
@ -3621,7 +3621,7 @@ again:
MDB_IDL loose; MDB_IDL loose;
/* Room for loose pages + temp IDL with same */ /* Room for loose pages + temp IDL with same */
if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0) if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0)
goto ballout; goto bailout;
mop = env->me_pghead; mop = env->me_pghead;
loose = mop + MDB_IDL_ALLOCLEN(mop) - count; loose = mop + MDB_IDL_ALLOCLEN(mop) - count;
for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp)) for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp))
@ -3643,7 +3643,7 @@ again:
if (! lifo) { if (! lifo) {
rc = mdb_cursor_first(&mc, &key, &data); rc = mdb_cursor_first(&mc, &key, &data);
if (rc) if (rc)
goto ballout; goto bailout;
} }
for(;;) { for(;;) {
@ -3661,7 +3661,7 @@ again:
key.mv_size = sizeof(id); key.mv_size = sizeof(id);
rc = mdb_cursor_get(&mc, &key, &data, MDB_SET); rc = mdb_cursor_get(&mc, &key, &data, MDB_SET);
if (rc) if (rc)
goto ballout; goto bailout;
} }
mdb_tassert(txn, cleanup_idx == (txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0)); mdb_tassert(txn, cleanup_idx == (txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
@ -3680,17 +3680,17 @@ again:
mdb_tassert(txn, cleanup_idx == (txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0)); mdb_tassert(txn, cleanup_idx == (txn->mt_lifo_reclaimed ? txn->mt_lifo_reclaimed[0] : 0));
mop[0] = save; mop[0] = save;
if (rc || (mop_len -= len) == 0) if (rc || (mop_len -= len) == 0)
goto ballout; goto bailout;
if (! lifo) { if (! lifo) {
rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT); rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT);
if (rc) if (rc)
goto ballout; goto bailout;
} }
} }
} }
ballout: bailout:
if (txn->mt_lifo_reclaimed) { if (txn->mt_lifo_reclaimed) {
mdb_tassert(txn, rc || cleanup_idx == txn->mt_lifo_reclaimed[0]); mdb_tassert(txn, rc || cleanup_idx == txn->mt_lifo_reclaimed[0]);
if (rc == 0 && cleanup_idx != txn->mt_lifo_reclaimed[0]) { if (rc == 0 && cleanup_idx != txn->mt_lifo_reclaimed[0]) {
@ -4241,7 +4241,7 @@ mdb_env_write_meta(MDB_txn *txn, int force)
{ {
MDB_env *env; MDB_env *env;
MDB_meta meta, metab, *mp; MDB_meta meta, metab, *mp;
unsigned flags; int syncflush;
size_t mapsize; size_t mapsize;
off_t off; off_t off;
int rc, len, toggle; int rc, len, toggle;
@ -4258,14 +4258,15 @@ mdb_env_write_meta(MDB_txn *txn, int force)
toggle, txn->mt_dbs[MAIN_DBI].md_root)); toggle, txn->mt_dbs[MAIN_DBI].md_root));
env = txn->mt_env; env = txn->mt_env;
flags = txn->mt_flags & env->me_flags; syncflush = force ||
! ((txn->mt_flags | env->me_flags) & (MDB_NOMETASYNC | MDB_NOSYNC));
mp = env->me_metas[toggle]; mp = env->me_metas[toggle];
mapsize = env->me_metas[toggle ^ 1]->mm_mapsize; mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
/* Persist any increases of mapsize config */ /* Persist any increases of mapsize config */
if (mapsize < env->me_mapsize) if (mapsize < env->me_mapsize)
mapsize = env->me_mapsize; mapsize = env->me_mapsize;
if (flags & MDB_WRITEMAP) { if (env->me_flags & MDB_WRITEMAP) {
mp->mm_mapsize = mapsize; mp->mm_mapsize = mapsize;
mp->mm_dbs[0] = txn->mt_dbs[0]; mp->mm_dbs[0] = txn->mt_dbs[0];
mp->mm_dbs[1] = txn->mt_dbs[1]; mp->mm_dbs[1] = txn->mt_dbs[1];
@ -4273,9 +4274,9 @@ mdb_env_write_meta(MDB_txn *txn, int force)
/* (LY) ITS#7969: issue a memory barrier, it is noop for x86. */ /* (LY) ITS#7969: issue a memory barrier, it is noop for x86. */
mdb_coherent_barrier(); mdb_coherent_barrier();
mp->mm_txnid = txn->mt_txnid; mp->mm_txnid = txn->mt_txnid;
if (force || !(flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { if ( syncflush ) {
unsigned meta_size = env->me_psize; unsigned meta_size = env->me_psize;
int flags = (!force && (env->me_flags & MDB_MAPASYNC)) ? MS_ASYNC : MS_SYNC; int mode = (!force && (env->me_flags & MDB_MAPASYNC)) ? MS_ASYNC : MS_SYNC;
ptr = env->me_map; ptr = env->me_map;
if (toggle) { if (toggle) {
#ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */ #ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */
@ -4285,12 +4286,12 @@ mdb_env_write_meta(MDB_txn *txn, int force)
#endif #endif
ptr += meta_size; ptr += meta_size;
} }
if (MDB_MSYNC(ptr, meta_size, flags)) { if (MDB_MSYNC(ptr, meta_size, mode)) {
rc = ErrCode(); rc = ErrCode();
goto fail; goto fail;
} }
#ifdef _WIN32 #ifdef _WIN32
else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) { else if (mode == MS_SYNC && MDB_FDATASYNC(env->me_fd)) {
rc = ErrCode(); rc = ErrCode();
goto fail; goto fail;
} }
@ -4315,8 +4316,7 @@ mdb_env_write_meta(MDB_txn *txn, int force)
off += PAGEHDRSZ; off += PAGEHDRSZ;
/* Write to the SYNC fd */ /* Write to the SYNC fd */
mfd = (force || !(flags & (MDB_NOSYNC|MDB_NOMETASYNC))) ? mfd = syncflush ? env->me_mfd : env->me_fd;
env->me_mfd : env->me_fd;
retry_write: retry_write:
#ifdef _WIN32 #ifdef _WIN32
{ {