mdbx: Merge branch 'devel' into master.

This commit is contained in:
Leo Yuriev 2015-11-23 10:19:33 +03:00
commit 6e1ba1cf28
5 changed files with 254 additions and 137 deletions

View File

@ -16,6 +16,14 @@ LMDB 0.9.17 Release Engineering
Fix ITS#7789 ensure mapsize >= pages in use Fix ITS#7789 ensure mapsize >= pages in use
Fix ITS#7971 mdb_txn_renew0() new reader slots Fix ITS#7971 mdb_txn_renew0() new reader slots
Fix ITS#7969 use __sync_synchronize on non-x86 Fix ITS#7969 use __sync_synchronize on non-x86
Fix ITS#8311 page_split from update_key
Fix ITS#8312 loose pages in nested txn
Fix ITS#8313 mdb_rebalance dummy cursor
Fix ITS#8315 dirty_room in nested txn
Fix ITS#8316 page_merge cursor tracking
Fix ITS#8319 mdb_load error messages
Fix ITS#8320 mdb_load plaintext input
Fix ITS#8321 cursor tracking
Added mdb_txn_id() (ITS#7994) Added mdb_txn_id() (ITS#7994)
Added robust mutex support Added robust mutex support
Miscellaneous cleanup/simplification Miscellaneous cleanup/simplification

336
mdb.c
View File

@ -889,7 +889,6 @@ struct MDB_cursor {
#define C_EOF 0x02 /**< No more data */ #define C_EOF 0x02 /**< No more data */
#define C_SUB 0x04 /**< Cursor is a sub-cursor */ #define C_SUB 0x04 /**< Cursor is a sub-cursor */
#define C_DEL 0x08 /**< last op was a cursor_del */ #define C_DEL 0x08 /**< last op was a cursor_del */
#define C_SPLITTING 0x20 /**< Cursor is in page_split */
#define C_UNTRACK 0x40 /**< Un-track cursor when closing */ #define C_UNTRACK 0x40 /**< Un-track cursor when closing */
#define C_RECLAIMING 0x80 /**< FreeDB lookup is prohibited */ #define C_RECLAIMING 0x80 /**< FreeDB lookup is prohibited */
/** @} */ /** @} */
@ -1033,7 +1032,7 @@ enum {
#define MDB_END_UPDATE 0x10 /**< update env state (DBIs) */ #define MDB_END_UPDATE 0x10 /**< update env state (DBIs) */
#define MDB_END_FREE 0x20 /**< free txn unless it is #MDB_env.%me_txn0 */ #define MDB_END_FREE 0x20 /**< free txn unless it is #MDB_env.%me_txn0 */
#define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */ #define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */
static void mdb_txn_end(MDB_txn *txn, unsigned mode); static int mdb_txn_end(MDB_txn *txn, unsigned mode);
static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl);
static int mdb_page_search_root(MDB_cursor *mc, static int mdb_page_search_root(MDB_cursor *mc,
@ -1059,7 +1058,7 @@ static int mdb_node_add(MDB_cursor *mc, indx_t indx,
MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags); MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags);
static void mdb_node_del(MDB_cursor *mc, int ksize); static void mdb_node_del(MDB_cursor *mc, int ksize);
static void mdb_node_shrink(MDB_page *mp, indx_t indx); static void mdb_node_shrink(MDB_page *mp, indx_t indx);
static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst); static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft);
static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data);
static size_t mdb_branch_size(MDB_env *env, MDB_val *key); static size_t mdb_branch_size(MDB_env *env, MDB_val *key);
@ -1401,7 +1400,7 @@ mdb_cursor_chk(MDB_cursor *mc)
MDB_node *node; MDB_node *node;
MDB_page *mp; MDB_page *mp;
if (!mc->mc_snum && !(mc->mc_flags & C_INITIALIZED)) return; if (!mc->mc_snum || !(mc->mc_flags & C_INITIALIZED)) return;
for (i=0; i<mc->mc_top; i++) { for (i=0; i<mc->mc_top; i++) {
mp = mc->mc_pg[i]; mp = mc->mc_pg[i];
node = NODEPTR(mp, mc->mc_ki[i]); node = NODEPTR(mp, mc->mc_ki[i]);
@ -1521,6 +1520,8 @@ mdb_page_malloc(MDB_txn *txn, unsigned num)
} }
#endif #endif
VALGRIND_MAKE_MEM_UNDEFINED(np, size); VALGRIND_MAKE_MEM_UNDEFINED(np, size);
np->mp_flags = 0;
np->mp_pages = num;
return np; return np;
} }
@ -2317,6 +2318,8 @@ done:
np->mp_pgno = pgno; np->mp_pgno = pgno;
np->mp_ksize = 0; np->mp_ksize = 0;
np->mp_flags = 0;
np->mp_pages = num;
mdb_page_dirty(txn, np); mdb_page_dirty(txn, np);
*mp = np; *mp = np;
@ -2600,14 +2603,15 @@ mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst)
*bk = *mc; *bk = *mc;
mc->mc_backup = bk; mc->mc_backup = bk;
mc->mc_db = &dst->mt_dbs[i]; mc->mc_db = &dst->mt_dbs[i];
/* Kill pointers into src - and dst to reduce abuse: The /* Kill pointers into src to reduce abuse: The
* user may not use mc until dst ends. Otherwise we'd... * user may not use mc until dst ends. But we need a valid
* txn pointer here for cursor fixups to keep working.
*/ */
mc->mc_txn = NULL; /* ...set this to dst */ mc->mc_txn = dst;
mc->mc_dbflag = NULL; /* ...and &dst->mt_dbflags[i] */ mc->mc_dbflag = &dst->mt_dbflags[i];
if ((mx = mc->mc_xcursor) != NULL) { if ((mx = mc->mc_xcursor) != NULL) {
*(MDB_xcursor *)(bk+1) = *mx; *(MDB_xcursor *)(bk+1) = *mx;
mx->mx_cursor.mc_txn = NULL; /* ...and dst. */ mx->mx_cursor.mc_txn = dst;
} }
mc->mc_next = dst->mt_cursors[i]; mc->mc_next = dst->mt_cursors[i];
dst->mt_cursors[i] = mc; dst->mt_cursors[i] = mc;
@ -2696,8 +2700,13 @@ mdb_txn_renew0(MDB_txn *txn)
uint16_t x; uint16_t x;
int rc, new_notls = 0; int rc, new_notls = 0;
if (unlikely(env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
if ((flags &= MDB_TXN_RDONLY) != 0) { if ((flags &= MDB_TXN_RDONLY) != 0) {
struct MDB_rthc* rthc = NULL; struct MDB_rthc *rthc = NULL;
MDB_reader *r = NULL; MDB_reader *r = NULL;
if (likely(env->me_flags & MDB_ENV_TXKEY)) { if (likely(env->me_flags & MDB_ENV_TXKEY)) {
mdb_assert(env, !(env->me_flags & MDB_NOTLS)); mdb_assert(env, !(env->me_flags & MDB_NOTLS));
@ -2885,6 +2894,11 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned flags, MDB_txn **ret)
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE)) if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDB_VERSION_MISMATCH; return MDB_VERSION_MISMATCH;
if (unlikely(env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
flags &= MDB_TXN_BEGIN_FLAGS; flags &= MDB_TXN_BEGIN_FLAGS;
flags |= env->me_flags & MDB_WRITEMAP; flags |= env->me_flags & MDB_WRITEMAP;
@ -3060,12 +3074,17 @@ mdbx_txn_straggler(MDB_txn *txn, int *percent)
* @param[in] txn the transaction handle to end * @param[in] txn the transaction handle to end
* @param[in] mode why and how to end the transaction * @param[in] mode why and how to end the transaction
*/ */
static void static int
mdb_txn_end(MDB_txn *txn, unsigned mode) mdb_txn_end(MDB_txn *txn, unsigned mode)
{ {
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
static const char *const names[] = MDB_END_NAMES; static const char *const names[] = MDB_END_NAMES;
if (unlikely(txn->mt_env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
/* Export or close DBI handles opened in this txn */ /* Export or close DBI handles opened in this txn */
mdb_dbis_update(txn, mode & MDB_END_UPDATE); mdb_dbis_update(txn, mode & MDB_END_UPDATE);
@ -3135,6 +3154,8 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
txn->mt_signature = 0; txn->mt_signature = 0;
free(txn); free(txn);
} }
return MDB_SUCCESS;
} }
int int
@ -3150,8 +3171,7 @@ mdb_txn_reset(MDB_txn *txn)
if (unlikely(!(txn->mt_flags & MDB_TXN_RDONLY))) if (unlikely(!(txn->mt_flags & MDB_TXN_RDONLY)))
return EINVAL; return EINVAL;
mdb_txn_end(txn, MDB_END_RESET); return mdb_txn_end(txn, MDB_END_RESET);
return MDB_SUCCESS;
} }
int int
@ -3166,8 +3186,7 @@ mdb_txn_abort(MDB_txn *txn)
if (txn->mt_child) if (txn->mt_child)
mdb_txn_abort(txn->mt_child); mdb_txn_abort(txn->mt_child);
mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE); return mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE);
return MDB_SUCCESS;
} }
static int static int
@ -3645,6 +3664,11 @@ mdb_txn_commit(MDB_txn *txn)
if(unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) if(unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
return MDB_VERSION_MISMATCH; return MDB_VERSION_MISMATCH;
if (unlikely(txn->mt_env->me_pid != getpid())) {
txn->mt_env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
/* mdb_txn_end() mode for a commit which writes nothing */ /* mdb_txn_end() mode for a commit which writes nothing */
end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE; end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE;
@ -3782,7 +3806,7 @@ mdb_txn_commit(MDB_txn *txn)
} }
/* Append our loose page list to parent's */ /* Append our loose page list to parent's */
for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp)) for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(*lp))
; ;
*lp = txn->mt_loose_pgs; *lp = txn->mt_loose_pgs;
parent->mt_loose_count += txn->mt_loose_count; parent->mt_loose_count += txn->mt_loose_count;
@ -3860,8 +3884,7 @@ mdb_txn_commit(MDB_txn *txn)
end_mode = MDB_END_COMMITTED|MDB_END_UPDATE; end_mode = MDB_END_COMMITTED|MDB_END_UPDATE;
done: done:
mdb_txn_end(txn, end_mode); return mdb_txn_end(txn, end_mode);
return MDB_SUCCESS;
fail: fail:
mdb_txn_abort(txn); mdb_txn_abort(txn);
@ -4050,7 +4073,8 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
if (unlikely(prev_mapsize != pending->mm_mapsize)) { if (unlikely(prev_mapsize != pending->mm_mapsize)) {
/* LY: It is no reason to use fdatasync() here, even in case /* LY: It is no reason to use fdatasync() here, even in case
* no such bug in a kernel. Because "no-bug" mean that a kernel * no such bug in a kernel. Because "no-bug" mean that a kernel
* internally do nearly the same. * internally do nearly the same, e.g. fdatasync() == fsync()
* when no-kernel-bug and file size was changed.
* *
* So, this code is always safe and without appreciable * So, this code is always safe and without appreciable
* performance degradation. * performance degradation.
@ -4200,14 +4224,16 @@ mdb_env_map(MDB_env *env, void *addr)
return errno; return errno;
} }
if (flags & MDB_NORDAHEAD) { unsigned madvise_flags = MADV_DONTFORK;
if (flags & MDB_NORDAHEAD)
/* Turn off readahead. It's harmful when the DB is larger than RAM. */ /* Turn off readahead. It's harmful when the DB is larger than RAM. */
#ifdef MADV_RANDOM madvise_flags |= MADV_RANDOM;
madvise(env->me_map, env->me_mapsize, MADV_RANDOM); if (madvise(env->me_map, env->me_mapsize, madvise_flags))
#elif defined(POSIX_MADV_RANDOM) return errno;
posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM);
#endif /* MADV_RANDOM & POSIX_MADV_RANDOM */ #ifdef MADV_DONTDUMP
} madvise(env->me_map, env->me_mapsize, MADV_DONTDUMP);
#endif
/* Can happen because the address argument to mmap() is just a /* Can happen because the address argument to mmap() is just a
* hint. mmap() can pick another, e.g. if the range is in use. * hint. mmap() can pick another, e.g. if the range is in use.
@ -4422,17 +4448,18 @@ static pthread_mutex_t mdb_rthc_lock = PTHREAD_MUTEX_INITIALIZER;
/* LY: TODO: Yet another problem is here - segfault in case if a DSO will /* LY: TODO: Yet another problem is here - segfault in case if a DSO will
* be unloaded before a thread would been finished. */ * be unloaded before a thread would been finished. */
static void static void
mdb_env_reader_dest(void *ptr) mdb_env_reader_destr(void *ptr)
{ {
struct MDB_rthc* rthc = ptr; struct MDB_rthc* rthc = ptr;
MDB_reader *reader; MDB_reader *reader;
if (! rthc)
/* LY: paranoia */
return;
mdb_ensure(NULL, pthread_mutex_lock(&mdb_rthc_lock) == 0); mdb_ensure(NULL, pthread_mutex_lock(&mdb_rthc_lock) == 0);
/* LY: Here may be a race with mdb_env_close(),
* see https://github.com/ReOpen/ReOpenLDAP/issues/48
*/
reader = rthc->rc_reader; reader = rthc->rc_reader;
if (reader) { if (reader && reader->mr_pid == getpid()) {
mdb_ensure(NULL, reader->mr_rthc == rthc); mdb_ensure(NULL, reader->mr_rthc == rthc);
rthc->rc_reader = NULL; rthc->rc_reader = NULL;
reader->mr_rthc = NULL; reader->mr_rthc = NULL;
@ -4605,7 +4632,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if (rc == EROFS && (env->me_flags & MDB_RDONLY)) { if (rc == EROFS && (env->me_flags & MDB_RDONLY)) {
return MDB_SUCCESS; return MDB_SUCCESS;
} }
goto fail_errno; return rc;
} }
/* Lose record locks when exec*() */ /* Lose record locks when exec*() */
@ -4613,22 +4640,22 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
fcntl(env->me_lfd, F_SETFD, fdflags); fcntl(env->me_lfd, F_SETFD, fdflags);
if (!(env->me_flags & MDB_NOTLS)) { if (!(env->me_flags & MDB_NOTLS)) {
rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); rc = pthread_key_create(&env->me_txkey, mdb_env_reader_destr);
if (rc) if (rc)
goto fail; return rc;
env->me_flags |= MDB_ENV_TXKEY; env->me_flags |= MDB_ENV_TXKEY;
} }
/* Try to get exclusive lock. If we succeed, then /* Try to get exclusive lock. If we succeed, then
* nobody is using the lock region and we should initialize it. * nobody is using the lock region and we should initialize it.
*/ */
if ((rc = mdb_env_excl_lock(env, excl))) goto fail; if ((rc = mdb_env_excl_lock(env, excl))) return rc;
size = lseek(env->me_lfd, 0, SEEK_END); size = lseek(env->me_lfd, 0, SEEK_END);
if (size == -1) goto fail_errno; if (size == -1) return errno;
rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
if (size < rsize && *excl > 0) { if (size < rsize && *excl > 0) {
if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno; if (ftruncate(env->me_lfd, rsize) != 0) return errno;
} else { } else {
rsize = size; rsize = size;
size = rsize - sizeof(MDB_txninfo); size = rsize - sizeof(MDB_txninfo);
@ -4637,9 +4664,16 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, env->me_lfd, 0); m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, env->me_lfd, 0);
if (m == MAP_FAILED) if (m == MAP_FAILED)
goto fail_errno; return errno;
env->me_txns = m; env->me_txns = m;
if (madvise(env->me_txns, rsize, MADV_DONTFORK | MADV_WILLNEED))
return errno;
#ifdef MADV_DODUMP
madvise(env->me_txns, rsize, MADV_DODUMP);
#endif
if (*excl > 0) { if (*excl > 0) {
pthread_mutexattr_t mattr; pthread_mutexattr_t mattr;
@ -4650,7 +4684,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
#endif /* MDB_USE_ROBUST */ #endif /* MDB_USE_ROBUST */
|| (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr)) || (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr))
|| (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr)))
goto fail; return rc;
pthread_mutexattr_destroy(&mattr); pthread_mutexattr_destroy(&mattr);
env->me_txns->mti_magic = MDB_MAGIC; env->me_txns->mti_magic = MDB_MAGIC;
@ -4660,27 +4694,19 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
} else { } else {
if (env->me_txns->mti_magic != MDB_MAGIC) { if (env->me_txns->mti_magic != MDB_MAGIC) {
mdb_debug("lock region has invalid magic"); mdb_debug("lock region has invalid magic");
rc = MDB_INVALID; return MDB_INVALID;
goto fail;
} }
if (env->me_txns->mti_format != MDB_LOCK_FORMAT) { if (env->me_txns->mti_format != MDB_LOCK_FORMAT) {
mdb_debug("lock region has format+version 0x%x, expected 0x%x", mdb_debug("lock region has format+version 0x%x, expected 0x%x",
env->me_txns->mti_format, MDB_LOCK_FORMAT); env->me_txns->mti_format, MDB_LOCK_FORMAT);
rc = MDB_VERSION_MISMATCH; return MDB_VERSION_MISMATCH;
goto fail;
} }
rc = errno; rc = errno;
if (rc && rc != EACCES && rc != EAGAIN) { if (rc && rc != EACCES && rc != EAGAIN)
goto fail; return rc;
}
} }
return MDB_SUCCESS; return MDB_SUCCESS;
fail_errno:
rc = errno;
fail:
return rc;
} }
/** The name of the lock file in the DB environment */ /** The name of the lock file in the DB environment */
@ -4903,20 +4929,25 @@ mdb_env_close0(MDB_env *env)
* data owned by this process (me_close_readers and * data owned by this process (me_close_readers and
* our readers), and clear each reader atomically. * our readers), and clear each reader atomically.
*/ */
mdb_ensure(env, pthread_mutex_lock(&mdb_rthc_lock) == 0); if (pid == getpid()) {
for (i = env->me_close_readers; --i >= 0; ) { mdb_ensure(env, pthread_mutex_lock(&mdb_rthc_lock) == 0);
MDB_reader *reader = &env->me_txns->mti_readers[i]; for (i = env->me_close_readers; --i >= 0; ) {
MDB_reader *reader = &env->me_txns->mti_readers[i];
if (reader->mr_pid == pid) { if (reader->mr_pid == pid) {
mdb_ensure(env, reader->mr_rthc->rc_reader == reader); struct MDB_rthc *rthc = reader->mr_rthc;
reader->mr_rthc->rc_reader = NULL; if (rthc) {
reader->mr_rthc = NULL; mdb_ensure(env, rthc->rc_reader == reader);
mdb_compiler_barrier(); rthc->rc_reader = NULL;
reader->mr_pid = 0; reader->mr_rthc = NULL;
free(rthc);
}
reader->mr_pid = 0;
}
} }
mdb_coherent_barrier();
mdb_ensure(env, pthread_mutex_unlock(&mdb_rthc_lock) == 0);
} }
mdb_coherent_barrier();
mdb_ensure(env, pthread_mutex_unlock(&mdb_rthc_lock) == 0);
munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
env->me_txns = NULL; env->me_txns = NULL;
@ -5533,6 +5564,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
return MDB_CORRUPTED; return MDB_CORRUPTED;
} }
} }
txn->mt_dirty_room++;
if (!(env->me_flags & MDB_WRITEMAP)) if (!(env->me_flags & MDB_WRITEMAP))
mdb_dpage_free(env, mp); mdb_dpage_free(env, mp);
release: release:
@ -6502,16 +6534,18 @@ fix_parent:
* update branch key if there is a parent page * update branch key if there is a parent page
*/ */
if (mc->mc_top && !mc->mc_ki[mc->mc_top]) { if (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
unsigned short top = mc->mc_top; unsigned short dtop = 1;
mc->mc_top--; mc->mc_top--;
/* slot 0 is always an empty key, find real slot */ /* slot 0 is always an empty key, find real slot */
while (mc->mc_top && !mc->mc_ki[mc->mc_top]) while (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
mc->mc_top--; mc->mc_top--;
dtop++;
}
if (mc->mc_ki[mc->mc_top]) if (mc->mc_ki[mc->mc_top])
rc2 = mdb_update_key(mc, key); rc2 = mdb_update_key(mc, key);
else else
rc2 = MDB_SUCCESS; rc2 = MDB_SUCCESS;
mc->mc_top = top; mc->mc_top += dtop;
if (rc2) if (rc2)
return rc2; return rc2;
} }
@ -6687,6 +6721,7 @@ current:
return ENOMEM; return ENOMEM;
id2.mid = pg; id2.mid = pg;
id2.mptr = np; id2.mptr = np;
/* Note - this page is already counted in parent's dirty_room */
rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
mdb_cassert(mc, rc2 == 0); mdb_cassert(mc, rc2 == 0);
if (!(flags & MDB_RESERVE)) { if (!(flags & MDB_RESERVE)) {
@ -6800,6 +6835,7 @@ put_sub:
MDB_xcursor *mx = mc->mc_xcursor; MDB_xcursor *mx = mc->mc_xcursor;
unsigned i = mc->mc_top; unsigned i = mc->mc_top;
MDB_page *mp = mc->mc_pg[i]; MDB_page *mp = mc->mc_pg[i];
int nkeys = NUMKEYS(mp);
for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
@ -6807,9 +6843,9 @@ put_sub:
if (m2->mc_pg[i] == mp) { if (m2->mc_pg[i] == mp) {
if (m2->mc_ki[i] == mc->mc_ki[i]) { if (m2->mc_ki[i] == mc->mc_ki[i]) {
mdb_xcursor_init2(m2, mx, new_dupdata); mdb_xcursor_init2(m2, mx, new_dupdata);
} else if (!insert_key) { } else if (!insert_key && m2->mc_ki[i] < nkeys) {
MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]); MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]);
if (!(n2->mn_flags & F_SUBDATA)) if ((n2->mn_flags & (F_SUBDATA|F_DUPDATA)) == F_DUPDATA)
m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2);
} }
} }
@ -7643,10 +7679,26 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
static void static void
mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst); mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst);
/** Track a temporary cursor */
#define CURSOR_TMP_TRACK(mc, mn, dummy, tracked) \
if (mc->mc_flags & C_SUB) { \
dummy.mc_flags = C_INITIALIZED; \
dummy.mc_xcursor = (MDB_xcursor *)&mn; \
tracked = &dummy; \
} else { \
tracked = &mn; \
} \
tracked->mc_next = mc->mc_txn->mt_cursors[mc->mc_dbi]; \
mc->mc_txn->mt_cursors[mc->mc_dbi] = tracked
/** Stop tracking a temporary cursor */
#define CURSOR_TMP_UNTRACK(mc, tracked) \
mc->mc_txn->mt_cursors[mc->mc_dbi] = tracked->mc_next
/** Move a node from csrc to cdst. /** Move a node from csrc to cdst.
*/ */
static int static int
mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst) mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
{ {
MDB_node *srcnode; MDB_node *srcnode;
MDB_val key, data; MDB_val key, data;
@ -7748,13 +7800,15 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
mps = csrc->mc_pg[csrc->mc_top]; mps = csrc->mc_pg[csrc->mc_top];
/* If we're adding on the left, bump others up */ /* If we're adding on the left, bump others up */
if (!cdst->mc_ki[csrc->mc_top]) { if (fromleft) {
mpd = cdst->mc_pg[csrc->mc_top]; mpd = cdst->mc_pg[csrc->mc_top];
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
if (csrc->mc_flags & C_SUB) if (csrc->mc_flags & C_SUB)
m3 = &m2->mc_xcursor->mx_cursor; m3 = &m2->mc_xcursor->mx_cursor;
else else
m3 = m2; m3 = m2;
if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top)
continue;
if (m3 != cdst && if (m3 != cdst &&
m3->mc_pg[csrc->mc_top] == mpd && m3->mc_pg[csrc->mc_top] == mpd &&
m3->mc_ki[csrc->mc_top] >= cdst->mc_ki[csrc->mc_top]) { m3->mc_ki[csrc->mc_top] >= cdst->mc_ki[csrc->mc_top]) {
@ -7777,6 +7831,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
else else
m3 = m2; m3 = m2;
if (m3 == csrc) continue; if (m3 == csrc) continue;
if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top)
continue;
if (m3->mc_pg[csrc->mc_top] == mps) { if (m3->mc_pg[csrc->mc_top] == mps) {
if (!m3->mc_ki[csrc->mc_top]) { if (!m3->mc_ki[csrc->mc_top]) {
m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
@ -7794,6 +7850,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
*/ */
if (csrc->mc_ki[csrc->mc_top] == 0) { if (csrc->mc_ki[csrc->mc_top] == 0) {
if (csrc->mc_ki[csrc->mc_top-1] != 0) { if (csrc->mc_ki[csrc->mc_top-1] != 0) {
MDB_cursor dummy, *tracked;
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
} else { } else {
@ -7806,7 +7863,11 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
mdb_cursor_copy(csrc, &mn); mdb_cursor_copy(csrc, &mn);
mn.mc_snum--; mn.mc_snum--;
mn.mc_top--; mn.mc_top--;
if (unlikely((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS)) /* We want mdb_rebalance to find mn when doing fixups */
CURSOR_TMP_TRACK(csrc, mn, dummy, tracked);
rc = mdb_update_key(&mn, &key);
CURSOR_TMP_UNTRACK(csrc, tracked);
if (unlikely(rc != MDB_SUCCESS))
return rc; return rc;
} }
if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
@ -7822,6 +7883,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
if (cdst->mc_ki[cdst->mc_top] == 0) { if (cdst->mc_ki[cdst->mc_top] == 0) {
if (cdst->mc_ki[cdst->mc_top-1] != 0) { if (cdst->mc_ki[cdst->mc_top-1] != 0) {
MDB_cursor dummy, *tracked;
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size); key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size);
} else { } else {
@ -7834,7 +7896,11 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
mdb_cursor_copy(cdst, &mn); mdb_cursor_copy(cdst, &mn);
mn.mc_snum--; mn.mc_snum--;
mn.mc_top--; mn.mc_top--;
if (unlikely((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS)) /* We want mdb_rebalance to find mn when doing fixups */
CURSOR_TMP_TRACK(cdst, mn, dummy, tracked);
rc = mdb_update_key(&mn, &key);
CURSOR_TMP_UNTRACK(cdst, tracked);
if (unlikely(rc != MDB_SUCCESS))
return rc; return rc;
} }
if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) { if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) {
@ -7974,6 +8040,9 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
m3->mc_pg[top] = pdst; m3->mc_pg[top] = pdst;
m3->mc_ki[top] += nkeys; m3->mc_ki[top] += nkeys;
m3->mc_ki[top-1] = cdst->mc_ki[top-1]; m3->mc_ki[top-1] = cdst->mc_ki[top-1];
} else if (m3->mc_pg[top-1] == csrc->mc_pg[top-1] &&
m3->mc_ki[top-1] > csrc->mc_ki[top-1]) {
m3->mc_ki[top-1]--;
} }
} }
} }
@ -8023,7 +8092,7 @@ static int
mdb_rebalance(MDB_cursor *mc) mdb_rebalance(MDB_cursor *mc)
{ {
MDB_node *node; MDB_node *node;
int rc; int rc, fromleft;
unsigned ptop, minkeys, thresh; unsigned ptop, minkeys, thresh;
MDB_cursor mn; MDB_cursor mn;
indx_t oldki; indx_t oldki;
@ -8074,7 +8143,8 @@ mdb_rebalance(MDB_cursor *mc)
m3 = &m2->mc_xcursor->mx_cursor; m3 = &m2->mc_xcursor->mx_cursor;
else else
m3 = m2; m3 = m2;
if (m3->mc_snum < mc->mc_snum) continue; if (!(m3->mc_flags & C_INITIALIZED) || (m3->mc_snum < mc->mc_snum))
continue;
if (m3->mc_pg[0] == mp) { if (m3->mc_pg[0] == mp) {
m3->mc_snum = 0; m3->mc_snum = 0;
m3->mc_top = 0; m3->mc_top = 0;
@ -8110,6 +8180,8 @@ mdb_rebalance(MDB_cursor *mc)
else else
m3 = m2; m3 = m2;
if (m3 == mc) continue; if (m3 == mc) continue;
if (!(m3->mc_flags & C_INITIALIZED))
continue;
if (m3->mc_pg[0] == mp) { if (m3->mc_pg[0] == mp) {
for (i=0; i<mc->mc_db->md_depth; i++) { for (i=0; i<mc->mc_db->md_depth; i++) {
m3->mc_pg[i] = m3->mc_pg[i+1]; m3->mc_pg[i] = m3->mc_pg[i+1];
@ -8153,6 +8225,7 @@ mdb_rebalance(MDB_cursor *mc)
return rc; return rc;
mn.mc_ki[mn.mc_top] = 0; mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]);
fromleft = 0;
} else { } else {
/* There is at least one neighbor to the left. /* There is at least one neighbor to the left.
*/ */
@ -8164,6 +8237,7 @@ mdb_rebalance(MDB_cursor *mc)
return rc; return rc;
mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1;
mc->mc_ki[mc->mc_top] = 0; mc->mc_ki[mc->mc_top] = 0;
fromleft = 1;
} }
mdb_debug("found neighbor page %zu (%u keys, %.1f%% full)", mdb_debug("found neighbor page %zu (%u keys, %.1f%% full)",
@ -8175,32 +8249,22 @@ mdb_rebalance(MDB_cursor *mc)
* (A branch page must never have less than 2 keys.) * (A branch page must never have less than 2 keys.)
*/ */
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) { if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
rc = mdb_node_move(&mn, mc); rc = mdb_node_move(&mn, mc, fromleft);
if (!mc->mc_ki[mc->mc_top]) { if (fromleft) {
/* if we inserted on left, bump position up */ /* if we inserted on left, bump position up */
oldki++; oldki++;
} }
} else { } else {
if (mc->mc_ki[ptop] == 0) { if (!fromleft) {
rc = mdb_page_merge(&mn, mc); rc = mdb_page_merge(&mn, mc);
} else { } else {
MDB_cursor dummy; MDB_cursor dummy, *tracked;
oldki += NUMKEYS(mn.mc_pg[mn.mc_top]); oldki += NUMKEYS(mn.mc_pg[mn.mc_top]);
mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1; mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
/* We want mdb_rebalance to find mn when doing fixups */ /* We want mdb_rebalance to find mn when doing fixups */
if (mc->mc_flags & C_SUB) { CURSOR_TMP_TRACK(mc, mn, dummy, tracked);
dummy.mc_next = mc->mc_txn->mt_cursors[mc->mc_dbi];
mc->mc_txn->mt_cursors[mc->mc_dbi] = &dummy;
dummy.mc_xcursor = (MDB_xcursor *)&mn;
} else {
mn.mc_next = mc->mc_txn->mt_cursors[mc->mc_dbi];
mc->mc_txn->mt_cursors[mc->mc_dbi] = &mn;
}
rc = mdb_page_merge(mc, &mn); rc = mdb_page_merge(mc, &mn);
if (mc->mc_flags & C_SUB) CURSOR_TMP_UNTRACK(mc, tracked);
mc->mc_txn->mt_cursors[mc->mc_dbi] = dummy.mc_next;
else
mc->mc_txn->mt_cursors[mc->mc_dbi] = mn.mc_next;
mdb_cursor_copy(&mn, mc); mdb_cursor_copy(&mn, mc);
} }
mc->mc_flags &= ~C_EOF; mc->mc_flags &= ~C_EOF;
@ -8238,7 +8302,7 @@ mdb_cursor_del0(MDB_cursor *mc)
if (m3->mc_ki[mc->mc_top] > ki) if (m3->mc_ki[mc->mc_top] > ki)
m3->mc_ki[mc->mc_top]--; m3->mc_ki[mc->mc_top]--;
else if (mc->mc_db->md_flags & MDB_DUPSORT) else if (mc->mc_db->md_flags & MDB_DUPSORT)
m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF; m3->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
} }
} }
} }
@ -8391,12 +8455,19 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
rp->mp_ksize = mp->mp_ksize; rp->mp_ksize = mp->mp_ksize;
mdb_debug("new right sibling: page %zu", rp->mp_pgno); mdb_debug("new right sibling: page %zu", rp->mp_pgno);
if (mc->mc_snum < 2) { /* Usually when splitting the root page, the cursor
* height is 1. But when called from mdb_update_key,
* the cursor height may be greater because it walks
* up the stack while finding the branch slot to update.
*/
if (mc->mc_top < 1) {
if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp))) if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
goto done; goto done;
/* shift current top to make room for new parent */ /* shift current top to make room for new parent */
mc->mc_pg[1] = mc->mc_pg[0]; for (i=mc->mc_snum; i>0; i--) {
mc->mc_ki[1] = mc->mc_ki[0]; mc->mc_pg[i] = mc->mc_pg[i-1];
mc->mc_ki[i] = mc->mc_ki[i-1];
}
mc->mc_pg[0] = pp; mc->mc_pg[0] = pp;
mc->mc_ki[0] = 0; mc->mc_ki[0] = 0;
mc->mc_db->md_root = pp->mp_pgno; mc->mc_db->md_root = pp->mp_pgno;
@ -8412,15 +8483,14 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
mc->mc_db->md_depth--; mc->mc_db->md_depth--;
goto done; goto done;
} }
mc->mc_snum = 2; mc->mc_snum++;
mc->mc_top = 1; mc->mc_top++;
ptop = 0; ptop = 0;
} else { } else {
ptop = mc->mc_top-1; ptop = mc->mc_top-1;
mdb_debug("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno); mdb_debug("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno);
} }
mc->mc_flags |= C_SPLITTING;
mdb_cursor_copy(mc, &mn); mdb_cursor_copy(mc, &mn);
mn.mc_pg[mn.mc_top] = rp; mn.mc_pg[mn.mc_top] = rp;
mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; mn.mc_ki[ptop] = mc->mc_ki[ptop]+1;
@ -8471,8 +8541,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
rp->mp_lower += sizeof(indx_t); rp->mp_lower += sizeof(indx_t);
rp->mp_upper -= ksize - sizeof(indx_t); rp->mp_upper -= ksize - sizeof(indx_t);
mc->mc_ki[mc->mc_top] = x; mc->mc_ki[mc->mc_top] = x;
mc->mc_pg[mc->mc_top] = rp;
mc->mc_ki[ptop]++;
} }
} else { } else {
int psize, nsize, k; int psize, nsize, k;
@ -8565,21 +8633,20 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
/* Copy separator key to the parent. /* Copy separator key to the parent.
*/ */
if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(env, &sepkey)) { if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(env, &sepkey)) {
int snum = mc->mc_snum;
MDB_cursor dummy, *tracked;
mn.mc_snum--; mn.mc_snum--;
mn.mc_top--; mn.mc_top--;
did_split = 1; did_split = 1;
/* We want other splits to find mn when doing fixups */
CURSOR_TMP_TRACK(mc, mn, dummy, tracked);
rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0); rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0);
if (unlikely(rc)) CURSOR_TMP_UNTRACK(mc, tracked);
if (unlikely(rc != MDB_SUCCESS))
goto done; goto done;
/* root split? */ /* root split? */
if (mn.mc_snum == mc->mc_snum) { if (mc->mc_snum > snum) {
mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top];
mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top];
mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop];
mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop];
mc->mc_snum++;
mc->mc_top++;
ptop++; ptop++;
} }
/* Right page might now have changed parent. /* Right page might now have changed parent.
@ -8605,10 +8672,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0);
mn.mc_top++; mn.mc_top++;
} }
mc->mc_flags ^= C_SPLITTING; if (unlikely(rc != MDB_SUCCESS))
if (unlikely(rc != MDB_SUCCESS)) {
goto done; goto done;
}
if (nflags & MDB_APPEND) { if (nflags & MDB_APPEND) {
mc->mc_pg[mc->mc_top] = rp; mc->mc_pg[mc->mc_top] = rp;
mc->mc_ki[mc->mc_top] = 0; mc->mc_ki[mc->mc_top] = 0;
@ -8675,11 +8740,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
/* reset back to original page */ /* reset back to original page */
if (newindx < split_indx) { if (newindx < split_indx) {
mc->mc_pg[mc->mc_top] = mp; mc->mc_pg[mc->mc_top] = mp;
if (nflags & MDB_RESERVE) {
node = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
if (!(node->mn_flags & F_BIGDATA))
newdata->mv_data = NODEDATA(node);
}
} else { } else {
mc->mc_pg[mc->mc_top] = rp; mc->mc_pg[mc->mc_top] = rp;
mc->mc_ki[ptop]++; mc->mc_ki[ptop]++;
@ -8693,13 +8753,32 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
} }
} }
} }
if (nflags & MDB_RESERVE) {
node = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (!(node->mn_flags & F_BIGDATA))
newdata->mv_data = NODEDATA(node);
}
} else {
if (newindx >= split_indx) {
mc->mc_pg[mc->mc_top] = rp;
mc->mc_ki[ptop]++;
/* Make sure mc_ki is still valid.
*/
if (mn.mc_pg[ptop] != mc->mc_pg[ptop] &&
mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) {
for (i=0; i<=ptop; i++) {
mc->mc_pg[i] = mn.mc_pg[i];
mc->mc_ki[i] = mn.mc_ki[i];
}
}
}
} }
{ {
/* Adjust other cursors pointing to mp */ /* Adjust other cursors pointing to mp */
MDB_cursor *m2, *m3; MDB_cursor *m2, *m3;
MDB_dbi dbi = mc->mc_dbi; MDB_dbi dbi = mc->mc_dbi;
int fixup = NUMKEYS(mp); nkeys = NUMKEYS(mp);
for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
if (mc->mc_flags & C_SUB) if (mc->mc_flags & C_SUB)
@ -8710,16 +8789,17 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
continue; continue;
if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED))
continue; continue;
if (m3->mc_flags & C_SPLITTING)
continue;
if (new_root) { if (new_root) {
int k; int k;
/* sub cursors may be on different DB */
if (m3->mc_pg[0] != mp)
continue;
/* root split */ /* root split */
for (k=new_root; k>=0; k--) { for (k=new_root; k>=0; k--) {
m3->mc_ki[k+1] = m3->mc_ki[k]; m3->mc_ki[k+1] = m3->mc_ki[k];
m3->mc_pg[k+1] = m3->mc_pg[k]; m3->mc_pg[k+1] = m3->mc_pg[k];
} }
if (m3->mc_ki[0] >= split_indx) { if (m3->mc_ki[0] > nkeys) {
m3->mc_ki[0] = 1; m3->mc_ki[0] = 1;
} else { } else {
m3->mc_ki[0] = 0; m3->mc_ki[0] = 0;
@ -8731,10 +8811,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
if (m3->mc_top >= mc->mc_top && m3->mc_pg[mc->mc_top] == mp) { if (m3->mc_top >= mc->mc_top && m3->mc_pg[mc->mc_top] == mp) {
if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE)) if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE))
m3->mc_ki[mc->mc_top]++; m3->mc_ki[mc->mc_top]++;
if (m3->mc_ki[mc->mc_top] >= fixup) { if (m3->mc_ki[mc->mc_top] >= nkeys) {
m3->mc_pg[mc->mc_top] = rp; m3->mc_pg[mc->mc_top] = rp;
m3->mc_ki[mc->mc_top] -= fixup; m3->mc_ki[mc->mc_top] -= nkeys;
m3->mc_ki[ptop] = mn.mc_ki[ptop]; for (i=0; i<mc->mc_top; i++) {
m3->mc_ki[i] = mn.mc_ki[i];
m3->mc_pg[i] = mn.mc_pg[i];
}
} }
} else if (!did_split && m3->mc_top >= ptop && m3->mc_pg[ptop] == mc->mc_pg[ptop] && } else if (!did_split && m3->mc_top >= ptop && m3->mc_pg[ptop] == mc->mc_pg[ptop] &&
m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { m3->mc_ki[ptop] >= mc->mc_ki[ptop]) {
@ -9151,7 +9234,9 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
return rc; return rc;
/* We must start the actual read txn after blocking writers */ /* We must start the actual read txn after blocking writers */
mdb_txn_end(txn, MDB_END_RESET_TMP); rc = mdb_txn_end(txn, MDB_END_RESET_TMP);
if (rc)
return rc;
/* Temporarily block writers until we snapshot the meta pages */ /* Temporarily block writers until we snapshot the meta pages */
wmutex = MDB_MUTEX(env, w); wmutex = MDB_MUTEX(env, w);
@ -9996,6 +10081,11 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
pid_t *pids, pid; pid_t *pids, pid;
int rc = MDB_SUCCESS, count = 0; int rc = MDB_SUCCESS, count = 0;
if (unlikely(env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
rdrs = env->me_txns->mti_numreaders; rdrs = env->me_txns->mti_numreaders;
pids = malloc((rdrs+1) * sizeof(pid_t)); pids = malloc((rdrs+1) * sizeof(pid_t));
if (!pids) if (!pids)

View File

@ -122,27 +122,42 @@ error(const char* msg, ...) {
} }
} }
static void pagemap_cleanup(void) {
int i;
for( i = 1; i < MAX_DBI; ++i ) {
if (walk.dbi_names[i]) {
free((void *) walk.dbi_names[i]);
walk.dbi_names[i] = NULL;
}
}
free(walk.pagemap);
walk.pagemap = NULL;
}
static int pagemap_lookup_dbi(const char* dbi) { static int pagemap_lookup_dbi(const char* dbi) {
static int last; static int last;
int i;
if (last > 0 && strcmp(walk.dbi_names[last], dbi) == 0) if (last > 0 && strcmp(walk.dbi_names[last], dbi) == 0)
return last; return last;
for(last = 1; walk.dbi_names[last] && last < MAX_DBI; ++last) for(i = 1; walk.dbi_names[i] && last < MAX_DBI; ++i)
if (strcmp(walk.dbi_names[last], dbi) == 0) if (strcmp(walk.dbi_names[i], dbi) == 0)
return last; return last = i;
if (last == MAX_DBI) if (i == MAX_DBI)
return last = -1; return -1;
walk.dbi_names[last] = strdup(dbi); walk.dbi_names[i] = strdup(dbi);
if (verbose > 1) { if (verbose > 1) {
print(" - found '%s' area\n", dbi); print(" - found '%s' area\n", dbi);
fflush(NULL); fflush(NULL);
} }
return last; return last = i;
} }
static void problem_add(const char* object, size_t entry_number, const char* msg, const char *extra, ...) { static void problem_add(const char* object, size_t entry_number, const char* msg, const char *extra, ...) {
@ -581,6 +596,8 @@ int main(int argc, char *argv[])
struct timespec timestamp_start, timestamp_finish; struct timespec timestamp_start, timestamp_finish;
double elapsed; double elapsed;
atexit(pagemap_cleanup);
if (clock_gettime(CLOCK_MONOTONIC, &timestamp_start)) { if (clock_gettime(CLOCK_MONOTONIC, &timestamp_start)) {
rc = errno; rc = errno;
error("clock_gettime failed, error %d %s\n", rc, mdbx_strerror(rc)); error("clock_gettime failed, error %d %s\n", rc, mdbx_strerror(rc));
@ -897,7 +914,6 @@ bailout:
mdbx_txn_abort(locktxn); mdbx_txn_abort(locktxn);
if (env) if (env)
mdbx_env_close(env); mdbx_env_close(env);
free(walk.pagemap);
fflush(NULL); fflush(NULL);
if (rc) { if (rc) {
if (rc < 0) if (rc < 0)

View File

@ -332,7 +332,7 @@ int main(int argc, char *argv[])
putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
break; break;
case 'T': case 'T':
mode |= NOHDR; mode |= NOHDR | PRINT;
break; break;
default: default:
usage(); usage();
@ -399,20 +399,22 @@ int main(int argc, char *argv[])
while(1) { while(1) {
rc = readline(&key, &kbuf); rc = readline(&key, &kbuf);
if (rc == EOF) if (rc) /* rc == EOF */
break; break;
if (rc)
goto txn_abort;
rc = readline(&data, &dbuf); rc = readline(&data, &dbuf);
if (rc) if (rc) {
fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno);
goto txn_abort; goto txn_abort;
}
rc = mdb_cursor_put(mc, &key, &data, putflags); rc = mdb_cursor_put(mc, &key, &data, putflags);
if (rc == MDB_KEYEXIST && putflags) if (rc == MDB_KEYEXIST && putflags)
continue; continue;
if (rc) if (rc) {
fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
goto txn_abort; goto txn_abort;
}
batch++; batch++;
if (batch == 100) { if (batch == 100) {
rc = mdb_txn_commit(txn); rc = mdb_txn_commit(txn);

View File

@ -159,6 +159,7 @@ int main(int argc,char * argv[])
mdb_dbi_close(env, dbi); mdb_dbi_close(env, dbi);
#endif #endif
mdb_env_close(env); mdb_env_close(env);
free(sval);
return 0; return 0;
} }