Merge branch 'devel'

This commit is contained in:
Leo Yuriev 2015-11-06 17:50:15 +03:00
commit f32dab22df
3 changed files with 153 additions and 58 deletions

15
CHANGES
View File

@ -3,10 +3,25 @@ LMDB 0.9 Change Log
LMDB 0.9.17 Release Engineering LMDB 0.9.17 Release Engineering
Fix ITS#7377 catch calloc failure Fix ITS#7377 catch calloc failure
Fix ITS#8237 regression from ITS#7589 Fix ITS#8237 regression from ITS#7589
Fix ITS#8238 page_split for DUPFIXED pages
Fix ITS#8221 MDB_PAGE_FULL on delete/rebalance Fix ITS#8221 MDB_PAGE_FULL on delete/rebalance
Fix ITS#8258 rebalance/split assert Fix ITS#8258 rebalance/split assert
Fix ITS#8263 cursor_put cursor tracking
Fix ITS#8264 cursor_del cursor tracking
Fix ITS#8299 mdb_del cursor tracking
Fix ITS#8300 mdb_del cursor tracking
Fix ITS#7771 fakepage cursor tracking
Fix ITS#7789 ensure mapsize >= pages in use
Fix ITS#7971 mdb_txn_renew0() new reader slots
Fix ITS#7969 use __sync_synchronize on non-x86
Added mdb_txn_id() (ITS#7994)
Added robust mutex support
Miscellaneous cleanup/simplification
Build Build
Create install dirs if needed (ITS#8256) Create install dirs if needed (ITS#8256)
Fix ThreadProc decl on Win32/MSVC (ITS#8270)
Added ssize_t typedef for MSVC (ITS#8067)
Use ANSI apis on Windows (ITS#8069)
LMDB 0.9.16 Release (2015/08/14) LMDB 0.9.16 Release (2015/08/14)
Fix cursor EOF bug (ITS#8190) Fix cursor EOF bug (ITS#8190)

View File

@ -1,25 +1,22 @@
# Makefile for liblmdb (Lightning memory-mapped database library). # Makefile for libmdbx (lightning memory-mapped database library for Linux).
######################################################################## ########################################################################
# Configuration. The compiler options must enable threaded compilation. # Configuration. The compiler options must enable threaded compilation.
# #
# Preprocessor macros (for CPPFLAGS) of interest... # Preprocessor macros (for XCFLAGS) of interest...
# Note that the defaults should already be correct for most # Note that the defaults should already be correct for most
# platforms; you should not need to change any of these. # platforms; you should not need to change any of these.
# Read their descriptions in mdb.c if you do: # Read their descriptions in mdb.c if you do:
# #
# - MDB_USE_POSIX_SEM # - MDB_USE_ROBUST
# - MDB_DSYNC
# - MDB_FDATASYNC
# - MDB_FDATASYNC_WORKS
# - MDB_USE_PWRITEV
# #
# There may be other macros in mdb.c of interest. You should # There may be other macros in mdb.c of interest. You should
# read mdb.c before changing any of them. # read mdb.c before changing any of them.
# #
CC ?= gcc CC ?= gcc
XCFLAGS ?=
CFLAGS ?= -O2 -g -Wall -Werror -Wno-unused-parameter CFLAGS ?= -O2 -g -Wall -Werror -Wno-unused-parameter
CFLAGS += -pthread CFLAGS += -pthread $(XCFLAGS)
prefix ?= /usr/local prefix ?= /usr/local
######################################################################## ########################################################################

183
mdb.c
View File

@ -142,6 +142,18 @@
# define mdb_func_ "<mdb_unknown>" # define mdb_func_ "<mdb_unknown>"
#endif #endif
/** Some platforms define the EOWNERDEAD error code
* even though they don't support Robust Mutexes.
* Compile with -DMDB_USE_ROBUST=0.
*/
#ifndef MDB_USE_ROBUST
# if defined(EOWNERDEAD) && defined(PTHREAD_MUTEX_ROBUST) && !defined(ANDROID)
# define MDB_USE_ROBUST 1
# else
# define MDB_USE_ROBUST 0
# endif
#endif /* MDB_USE_ROBUST */
/* Internal error codes, not exposed outside liblmdb */ /* Internal error codes, not exposed outside liblmdb */
#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10) #define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10)
@ -1065,6 +1077,7 @@ static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data);
static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx);
static void mdb_xcursor_init0(MDB_cursor *mc); static void mdb_xcursor_init0(MDB_cursor *mc);
static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node);
static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force);
static int mdb_drop0(MDB_cursor *mc, int subs); static int mdb_drop0(MDB_cursor *mc, int subs);
static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi);
@ -4626,9 +4639,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if ((rc = pthread_mutexattr_init(&mattr)) if ((rc = pthread_mutexattr_init(&mattr))
|| (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED))
#ifdef EOWNERDEAD #if MDB_USE_ROBUST
|| (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)) || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST))
#endif #endif /* MDB_USE_ROBUST */
|| (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr)) || (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr))
|| (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr)))
goto fail; goto fail;
@ -6312,7 +6325,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
{ {
MDB_env *env; MDB_env *env;
MDB_node *leaf = NULL; MDB_node *leaf = NULL;
MDB_page *fp, *mp; MDB_page *fp, *mp, *sub_root = NULL;
uint16_t fp_flags; uint16_t fp_flags;
MDB_val xdata, *rdata, dkey, olddata; MDB_val xdata, *rdata, dkey, olddata;
MDB_db dummy; MDB_db dummy;
@ -6604,6 +6617,7 @@ prep_subDB:
offset = env->me_psize - olddata.mv_size; offset = env->me_psize - olddata.mv_size;
flags |= F_DUPDATA|F_SUBDATA; flags |= F_DUPDATA|F_SUBDATA;
dummy.md_root = mp->mp_pgno; dummy.md_root = mp->mp_pgno;
sub_root = mp;
} }
if (mp != fp) { if (mp != fp) {
mp->mp_flags = fp_flags | P_DIRTY; mp->mp_flags = fp_flags | P_DIRTY;
@ -6750,7 +6764,7 @@ new_sub:
* DB are all zero size. * DB are all zero size.
*/ */
if (do_sub) { if (do_sub) {
int xflags; int xflags, new_dupdata;
size_t ecount; size_t ecount;
put_sub: put_sub:
xdata.mv_size = 0; xdata.mv_size = 0;
@ -6763,27 +6777,37 @@ put_sub:
xflags = (flags & MDB_NODUPDATA) ? xflags = (flags & MDB_NODUPDATA) ?
MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL; MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL;
} }
if (sub_root)
mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root;
new_dupdata = (int)dkey.mv_size;
/* converted, write the original data first */ /* converted, write the original data first */
if (dkey.mv_size) { if (dkey.mv_size) {
rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
if (unlikely(rc)) if (unlikely(rc))
goto bad_sub; goto bad_sub;
{ /* we've done our job */
/* Adjust other cursors pointing to mp */ dkey.mv_size = 0;
MDB_cursor *m2; }
unsigned i = mc->mc_top; if (!(leaf->mn_flags & F_SUBDATA) || sub_root) {
MDB_page *mp = mc->mc_pg[i]; /* Adjust other cursors pointing to mp */
MDB_cursor *m2;
MDB_xcursor *mx = mc->mc_xcursor;
unsigned i = mc->mc_top;
MDB_page *mp = mc->mc_pg[i];
for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
if (!(m2->mc_flags & C_INITIALIZED)) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue;
if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { if (m2->mc_pg[i] == mp) {
mdb_xcursor_init1(m2, leaf); if (m2->mc_ki[i] == mc->mc_ki[i]) {
mdb_xcursor_init2(m2, mx, new_dupdata);
} else if (!insert_key) {
MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]);
if (!(n2->mn_flags & F_SUBDATA))
m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2);
} }
} }
} }
/* we've done our job */
dkey.mv_size = 0;
} }
ecount = mc->mc_xcursor->mx_db.md_entries; ecount = mc->mc_xcursor->mx_db.md_entries;
if (flags & MDB_APPENDDUP) if (flags & MDB_APPENDDUP)
@ -6885,12 +6909,19 @@ mdb_cursor_del(MDB_cursor *mc, unsigned flags)
mdb_node_shrink(mp, mc->mc_ki[mc->mc_top]); mdb_node_shrink(mp, mc->mc_ki[mc->mc_top]);
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
/* fix other sub-DB cursors pointed at this fake page */ /* fix other sub-DB cursors pointed at fake pages on this page */
for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
if (m2->mc_pg[mc->mc_top] == mp && if (!(m2->mc_flags & C_INITIALIZED)) continue;
m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) if (m2->mc_pg[mc->mc_top] == mp) {
m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); if (m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) {
m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
} else {
MDB_node *n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]);
if (!(n2->mn_flags & F_SUBDATA))
m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2);
}
}
} }
} }
mc->mc_db->md_entries--; mc->mc_db->md_entries--;
@ -7335,6 +7366,38 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
mc->mc_signature = MDBX_MC_SIGNATURE; mc->mc_signature = MDBX_MC_SIGNATURE;
} }
/** Fixup a sorted-dups cursor due to underlying update.
* Sets up some fields that depend on the data from the main cursor.
* Almost the same as init1, but skips initialization steps if the
* xcursor had already been used.
* @param[in] mc The main cursor whose sorted-dups cursor is to be fixed up.
* @param[in] src_mx The xcursor of an up-to-date cursor.
* @param[in] new_dupdata True if converting from a non-#F_DUPDATA item.
*/
static void
mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata)
{
MDB_xcursor *mx = mc->mc_xcursor;
if (new_dupdata) {
mx->mx_cursor.mc_snum = 1;
mx->mx_cursor.mc_top = 0;
mx->mx_cursor.mc_flags |= C_INITIALIZED;
mx->mx_cursor.mc_ki[0] = 0;
mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */
#if UINT_MAX < SIZE_MAX
mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp;
#endif
} else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) {
return;
}
mx->mx_db = src_mx->mx_db;
mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0];
mdb_debug("Sub-db -%u root page %zu", mx->mx_cursor.mc_dbi,
mx->mx_db.md_root);
}
/** Initialize a cursor for a given transaction and database. */ /** Initialize a cursor for a given transaction and database. */
static void static void
mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
@ -7675,32 +7738,48 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
/* Adjust other cursors pointing to mp */ /* Adjust other cursors pointing to mp */
MDB_cursor *m2, *m3; MDB_cursor *m2, *m3;
MDB_dbi dbi = csrc->mc_dbi; MDB_dbi dbi = csrc->mc_dbi;
MDB_page *mp; MDB_page *mpd, *mps;
mp = cdst->mc_pg[csrc->mc_top]; mps = csrc->mc_pg[csrc->mc_top];
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { /* If we're adding on the left, bump others up */
if (csrc->mc_flags & C_SUB) if (!cdst->mc_ki[csrc->mc_top]) {
m3 = &m2->mc_xcursor->mx_cursor; mpd = cdst->mc_pg[csrc->mc_top];
else for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
m3 = m2; if (csrc->mc_flags & C_SUB)
if (m3 == cdst) continue; m3 = &m2->mc_xcursor->mx_cursor;
if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] >= else
cdst->mc_ki[csrc->mc_top]) { m3 = m2;
m3->mc_ki[csrc->mc_top]++; if (m3 != cdst &&
m3->mc_pg[csrc->mc_top] == mpd &&
m3->mc_ki[csrc->mc_top] >= cdst->mc_ki[csrc->mc_top]) {
m3->mc_ki[csrc->mc_top]++;
}
if (m3 !=csrc &&
m3->mc_pg[csrc->mc_top] == mps &&
m3->mc_ki[csrc->mc_top] == csrc->mc_ki[csrc->mc_top]) {
m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
m3->mc_ki[csrc->mc_top-1]++;
}
} }
} } else
/* Adding on the right, bump others down */
mp = csrc->mc_pg[csrc->mc_top]; {
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
if (csrc->mc_flags & C_SUB) if (csrc->mc_flags & C_SUB)
m3 = &m2->mc_xcursor->mx_cursor; m3 = &m2->mc_xcursor->mx_cursor;
else else
m3 = m2; m3 = m2;
if (m3 == csrc) continue; if (m3 == csrc) continue;
if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] == if (m3->mc_pg[csrc->mc_top] == mps) {
csrc->mc_ki[csrc->mc_top]) { if (!m3->mc_ki[csrc->mc_top]) {
m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
m3->mc_ki[csrc->mc_top-1]--;
} else {
m3->mc_ki[csrc->mc_top]--;
}
}
} }
} }
} }
@ -7873,6 +7952,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
/* Adjust other cursors pointing to mp */ /* Adjust other cursors pointing to mp */
MDB_cursor *m2, *m3; MDB_cursor *m2, *m3;
MDB_dbi dbi = csrc->mc_dbi; MDB_dbi dbi = csrc->mc_dbi;
unsigned top = csrc->mc_top;
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
if (csrc->mc_flags & C_SUB) if (csrc->mc_flags & C_SUB)
@ -7881,9 +7961,10 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
m3 = m2; m3 = m2;
if (m3 == csrc) continue; if (m3 == csrc) continue;
if (m3->mc_snum < csrc->mc_snum) continue; if (m3->mc_snum < csrc->mc_snum) continue;
if (m3->mc_pg[csrc->mc_top] == psrc) { if (m3->mc_pg[top] == psrc) {
m3->mc_pg[csrc->mc_top] = pdst; m3->mc_pg[top] = pdst;
m3->mc_ki[csrc->mc_top] += nkeys; m3->mc_ki[top] += nkeys;
m3->mc_ki[top-1] = cdst->mc_ki[top-1];
} }
} }
} }
@ -8086,7 +8167,8 @@ mdb_rebalance(MDB_cursor *mc)
*/ */
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) { if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
rc = mdb_node_move(&mn, mc); rc = mdb_node_move(&mn, mc);
if (mc->mc_ki[mc->mc_top-1]) { if (!mc->mc_ki[mc->mc_top]) {
/* if we inserted on left, bump position up */
oldki++; oldki++;
} }
} else { } else {
@ -8381,6 +8463,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
rp->mp_upper -= ksize - sizeof(indx_t); rp->mp_upper -= ksize - sizeof(indx_t);
mc->mc_ki[mc->mc_top] = x; mc->mc_ki[mc->mc_top] = x;
mc->mc_pg[mc->mc_top] = rp; mc->mc_pg[mc->mc_top] = rp;
mc->mc_ki[ptop]++;
} }
} else { } else {
int psize, nsize, k; int psize, nsize, k;
@ -9952,7 +10035,7 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
static int __cold static int __cold
mdb_mutex_failed(MDB_env *env, pthread_mutex_t *mutex, int rc) mdb_mutex_failed(MDB_env *env, pthread_mutex_t *mutex, int rc)
{ {
#ifdef EOWNERDEAD #if MDB_USE_ROBUST
if (unlikely(rc == EOWNERDEAD)) { if (unlikely(rc == EOWNERDEAD)) {
int rlocked, rc2; int rlocked, rc2;
@ -9987,7 +10070,7 @@ mdb_mutex_failed(MDB_env *env, pthread_mutex_t *mutex, int rc)
pthread_mutex_unlock(mutex); pthread_mutex_unlock(mutex);
} }
} }
#endif /* EOWNERDEAD */ #endif /* MDB_USE_ROBUST */
if (unlikely(rc)) { if (unlikely(rc)) {
mdb_debug("lock mutex failed, %s", mdb_strerror(rc)); mdb_debug("lock mutex failed, %s", mdb_strerror(rc));
if (rc != EDEADLK) { if (rc != EDEADLK) {