From f6045ae77e8515d4f71b42ab1a64d35eca6cee2e Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 7 Oct 2015 06:30:58 +0100 Subject: [PATCH 1/7] lmdb: ITS#8263 fix cursor tracking in cursor_put. Includes ITS#8263 streamline prev patch. --- mdb.c | 70 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/mdb.c b/mdb.c index 337bb6aa..09205868 100644 --- a/mdb.c +++ b/mdb.c @@ -1059,6 +1059,7 @@ static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); static void mdb_xcursor_init0(MDB_cursor *mc); static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); +static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force); static int mdb_drop0(MDB_cursor *mc, int subs); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); @@ -6202,7 +6203,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, { MDB_env *env; MDB_node *leaf = NULL; - MDB_page *fp, *mp; + MDB_page *fp, *mp, *sub_root = NULL; uint16_t fp_flags; MDB_val xdata, *rdata, dkey, olddata; MDB_db dummy; @@ -6491,6 +6492,7 @@ prep_subDB: offset = env->me_psize - olddata.mv_size; flags |= F_DUPDATA|F_SUBDATA; dummy.md_root = mp->mp_pgno; + sub_root = mp; } if (mp != fp) { mp->mp_flags = fp_flags | P_DIRTY; @@ -6637,7 +6639,7 @@ new_sub: * DB are all zero size. */ if (do_sub) { - int xflags; + int xflags, new_dupdata; size_t ecount; put_sub: xdata.mv_size = 0; @@ -6650,28 +6652,32 @@ put_sub: xflags = (flags & MDB_NODUPDATA) ? MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL; } + if (sub_root) + mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; + new_dupdata = (int)dkey.mv_size; /* converted, write the original data first */ if (dkey.mv_size) { rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); if (unlikely(rc)) goto bad_sub; - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2; - unsigned i = mc->mc_top; - MDB_page *mp = mc->mc_pg[i]; - - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { - if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; - if (!(m2->mc_flags & C_INITIALIZED)) continue; - if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { - mdb_xcursor_init1(m2, leaf); - } - } - } /* we've done our job */ dkey.mv_size = 0; } + if (!(leaf->mn_flags & F_SUBDATA) || sub_root) { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2; + MDB_xcursor *mx = mc->mc_xcursor; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (!(m2->mc_flags & C_INITIALIZED)) continue; + if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { + mdb_xcursor_init2(m2, mx, new_dupdata); + } + } + } ecount = mc->mc_xcursor->mx_db.md_entries; if (flags & MDB_APPENDDUP) xflags |= MDB_APPEND; @@ -7215,6 +7221,38 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) #endif */ } + +/** Fixup a sorted-dups cursor due to underlying update. + * Sets up some fields that depend on the data from the main cursor. + * Almost the same as init1, but skips initialization steps if the + * xcursor had already been used. + * @param[in] mc The main cursor whose sorted-dups cursor is to be fixed up. + * @param[in] src_mx The xcursor of an up-to-date cursor. + * @param[in] new_dupdata True if converting from a non-#F_DUPDATA item. + */ +static void +mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + if (new_dupdata) { + mx->mx_cursor.mc_snum = 1; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags |= C_INITIALIZED; + mx->mx_cursor.mc_ki[0] = 0; + mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */ +#if UINT_MAX < SIZE_MAX + mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; +#endif + } else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) { + return; + } + mx->mx_db = src_mx->mx_db; + mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0]; + mdb_debug("Sub-db -%u root page %zu", mx->mx_cursor.mc_dbi, + mx->mx_db.md_root); +} + /** Initialize a cursor for a given transaction and database. */ static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) From ad808146c8bd886f8ce6e6be8d3fb5fbbb5e5ea9 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 12 Oct 2015 00:08:41 +0100 Subject: [PATCH 2/7] lmdb: ITS#7771 fix cursor tracking on fake pages. node_del shifts nodes around, cursors pointing at fake pages need to have their mc_pg[0] corrected. Includes ITS#7771 more for prev commit. --- mdb.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/mdb.c b/mdb.c index 09205868..bebe23de 100644 --- a/mdb.c +++ b/mdb.c @@ -6673,8 +6673,14 @@ put_sub: for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; - if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { - mdb_xcursor_init2(m2, mx, new_dupdata); + if (m2->mc_pg[i] == mp) { + if (m2->mc_ki[i] == mc->mc_ki[i]) { + mdb_xcursor_init2(m2, mx, new_dupdata); + } else if (!insert_key) { + MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]); + if (!(n2->mn_flags & F_SUBDATA)) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + } } } } @@ -6772,12 +6778,19 @@ mdb_cursor_del(MDB_cursor *mc, unsigned flags) mdb_node_shrink(mp, mc->mc_ki[mc->mc_top]); leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - /* fix other sub-DB cursors pointed at this fake page */ + /* fix other sub-DB cursors pointed at fake pages on this page */ for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; - if (m2->mc_pg[mc->mc_top] == mp && - m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + if (!(m2->mc_flags & C_INITIALIZED)) continue; + if (m2->mc_pg[mc->mc_top] == mp) { + if (m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) { + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } else { + MDB_node *n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); + if (!(n2->mn_flags & F_SUBDATA)) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + } + } } } mc->mc_db->md_entries--; From 372a6d8521e5c4f4fcd7706e9c3b2e3c834845cc Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Mon, 26 Oct 2015 20:18:19 +0000 Subject: [PATCH 3/7] lmdb: origin - CHANGES, More misc updates. Change-Id: I1f86203b89c895ba8caf7916173b23c7e2a211d9 --- CHANGES | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGES b/CHANGES index 94daca60..5b5b3ff9 100644 --- a/CHANGES +++ b/CHANGES @@ -5,8 +5,20 @@ LMDB 0.9.17 Release Engineering Fix ITS#8237 regression from ITS#7589 Fix ITS#8221 MDB_PAGE_FULL on delete/rebalance Fix ITS#8258 rebalance/split assert + Fix ITS#8264 cursor_del cursor tracking + Fix ITS#8263 cursor_put cursor tracking + Fix ITS#7771 fakepage cursor tracking + Fix ITS#7789 ensure mapsize >= pages in use + Fix ITS#7971 mdb_txn_renew0() new reader slots + Fix ITS#7969 use __sync_synchronize on non-x86 + Added mdb_txn_id() (ITS#7994) + Added robust mutex support + Miscellaneous cleanup/simplification Build Create install dirs if needed (ITS#8256) + Fix ThreadProc decl on Win32/MSVC (ITS#8270) + Added ssize_t typedef for MSVC (ITS#8067) + Use ANSI apis on Windows (ITS#8069) LMDB 0.9.16 Release (2015/08/14) Fix cursor EOF bug (ITS#8190) From b430c9a22f27542513a2b5a17eef004d01c7239d Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Fri, 6 Nov 2015 17:26:41 +0300 Subject: [PATCH 4/7] mdbx: Add MDB_USE_ROBUST to control Robust Mutexes. Backported from origin. Change-Id: I416c1d09fb1f290423f29a84831accdaf4436ab0 --- Makefile | 13 +++++-------- mdb.c | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 61f7fc93..09f9040b 100644 --- a/Makefile +++ b/Makefile @@ -1,25 +1,22 @@ -# Makefile for liblmdb (Lightning memory-mapped database library). +# Makefile for libmdbx (lightning memory-mapped database library for Linux). ######################################################################## # Configuration. The compiler options must enable threaded compilation. # -# Preprocessor macros (for CPPFLAGS) of interest... +# Preprocessor macros (for XCFLAGS) of interest... # Note that the defaults should already be correct for most # platforms; you should not need to change any of these. # Read their descriptions in mdb.c if you do: # -# - MDB_USE_POSIX_SEM -# - MDB_DSYNC -# - MDB_FDATASYNC -# - MDB_FDATASYNC_WORKS -# - MDB_USE_PWRITEV +# - MDB_USE_ROBUST # # There may be other macros in mdb.c of interest. You should # read mdb.c before changing any of them. # CC ?= gcc +XCFLAGS ?= CFLAGS ?= -O2 -g -Wall -Werror -Wno-unused-parameter -CFLAGS += -pthread +CFLAGS += -pthread $(XCFLAGS) prefix ?= /usr/local ######################################################################## diff --git a/mdb.c b/mdb.c index 010d0942..787fe6de 100644 --- a/mdb.c +++ b/mdb.c @@ -142,6 +142,18 @@ # define mdb_func_ "" #endif +/** Some platforms define the EOWNERDEAD error code + * even though they don't support Robust Mutexes. + * Compile with -DMDB_USE_ROBUST=0. + */ +#ifndef MDB_USE_ROBUST +# if defined(EOWNERDEAD) && defined(PTHREAD_MUTEX_ROBUST) && !defined(ANDROID) +# define MDB_USE_ROBUST 1 +# else +# define MDB_USE_ROBUST 0 +# endif +#endif /* MDB_USE_ROBUST */ + /* Internal error codes, not exposed outside liblmdb */ #define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10) @@ -4627,9 +4639,9 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) if ((rc = pthread_mutexattr_init(&mattr)) || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) -#ifdef EOWNERDEAD +#if MDB_USE_ROBUST || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)) -#endif +#endif /* MDB_USE_ROBUST */ || (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr)) || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) goto fail; @@ -10003,7 +10015,7 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead) static int __cold mdb_mutex_failed(MDB_env *env, pthread_mutex_t *mutex, int rc) { -#ifdef EOWNERDEAD +#if MDB_USE_ROBUST if (unlikely(rc == EOWNERDEAD)) { int rlocked, rc2; @@ -10038,7 +10050,7 @@ mdb_mutex_failed(MDB_env *env, pthread_mutex_t *mutex, int rc) pthread_mutex_unlock(mutex); } } -#endif /* EOWNERDEAD */ +#endif /* MDB_USE_ROBUST */ if (unlikely(rc)) { mdb_debug("lock mutex failed, %s", mdb_strerror(rc)); if (rc != EDEADLK) { From f35fb03f86c2cc39f808d19cca6e63688b93ddda Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 4 Nov 2015 17:03:48 +0000 Subject: [PATCH 5/7] mdbx: backport - ITS#8299 fix page_merge cursor fixup. The parent's mc_ki has changed. We need to fix that up as well in other cursors. Change-Id: I1f422a07ed75b99f3e36075749800c892ca6d5ec --- CHANGES | 3 ++- mdb.c | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index 5b5b3ff9..254e3df3 100644 --- a/CHANGES +++ b/CHANGES @@ -5,8 +5,9 @@ LMDB 0.9.17 Release Engineering Fix ITS#8237 regression from ITS#7589 Fix ITS#8221 MDB_PAGE_FULL on delete/rebalance Fix ITS#8258 rebalance/split assert - Fix ITS#8264 cursor_del cursor tracking Fix ITS#8263 cursor_put cursor tracking + Fix ITS#8264 cursor_del cursor tracking + Fix ITS#8299 mdb_del cursor tracking Fix ITS#7771 fakepage cursor tracking Fix ITS#7789 ensure mapsize >= pages in use Fix ITS#7971 mdb_txn_renew0() new reader slots diff --git a/mdb.c b/mdb.c index 787fe6de..5f64aefe 100644 --- a/mdb.c +++ b/mdb.c @@ -7936,6 +7936,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) /* Adjust other cursors pointing to mp */ MDB_cursor *m2, *m3; MDB_dbi dbi = csrc->mc_dbi; + unsigned top = csrc->mc_top; for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { if (csrc->mc_flags & C_SUB) @@ -7944,9 +7945,10 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) m3 = m2; if (m3 == csrc) continue; if (m3->mc_snum < csrc->mc_snum) continue; - if (m3->mc_pg[csrc->mc_top] == psrc) { - m3->mc_pg[csrc->mc_top] = pdst; - m3->mc_ki[csrc->mc_top] += nkeys; + if (m3->mc_pg[top] == psrc) { + m3->mc_pg[top] = pdst; + m3->mc_ki[top] += nkeys; + m3->mc_ki[top-1] = cdst->mc_ki[top-1]; } } } From 7890eb8a8e6e20b3ef507bafcb0df33ee0822a10 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 4 Nov 2015 18:11:12 +0000 Subject: [PATCH 6/7] mdbx: backport - ITS#8238 fix DUPFIXED page_split. Parent mc_ki wasn't adjusted if new_indx was > split point Change-Id: I45548e378e53bad5ce7a3a7c2b8236d592f3c412 --- CHANGES | 1 + mdb.c | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGES b/CHANGES index 254e3df3..05976ffa 100644 --- a/CHANGES +++ b/CHANGES @@ -3,6 +3,7 @@ LMDB 0.9 Change Log LMDB 0.9.17 Release Engineering Fix ITS#7377 catch calloc failure Fix ITS#8237 regression from ITS#7589 + Fix ITS#8238 page_split for DUPFIXED pages Fix ITS#8221 MDB_PAGE_FULL on delete/rebalance Fix ITS#8258 rebalance/split assert Fix ITS#8263 cursor_put cursor tracking diff --git a/mdb.c b/mdb.c index 5f64aefe..671e5312 100644 --- a/mdb.c +++ b/mdb.c @@ -8446,6 +8446,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno rp->mp_upper -= ksize - sizeof(indx_t); mc->mc_ki[mc->mc_top] = x; mc->mc_pg[mc->mc_top] = rp; + mc->mc_ki[ptop]++; } } else { int psize, nsize, k; From ee26517b5d909ab37bfddc5b91f59585e2512726 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Wed, 4 Nov 2015 20:38:30 +0000 Subject: [PATCH 7/7] mdbx: backpost - ITS#8300 fix rebalance after node_move. ITS#8258, ITS#7829 fixes checked parent index to see if we were moving from a left neighbor. Should have just checked to see if current index was 0, meaning we added on the left. (Parent index may not tell us anything meaningful after a nested rebalance.) Includes: - ITS#8300 fix node_move Don't adjust other cursors when we added a node on the right. - ITS#8300 more for node_move fixups When moving a node from the right neighbor, a different adjustment is needed. - ITS#8300 simplify - ITS#8300 more for node_move When moving a node from one page to another, make sure other cursors' parent index gets adjusted too. --- CHANGES | 1 + mdb.c | 67 ++++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/CHANGES b/CHANGES index 05976ffa..0b63a13d 100644 --- a/CHANGES +++ b/CHANGES @@ -9,6 +9,7 @@ LMDB 0.9.17 Release Engineering Fix ITS#8263 cursor_put cursor tracking Fix ITS#8264 cursor_del cursor tracking Fix ITS#8299 mdb_del cursor tracking + Fix ITS#8300 mdb_del cursor tracking Fix ITS#7771 fakepage cursor tracking Fix ITS#7789 ensure mapsize >= pages in use Fix ITS#7971 mdb_txn_renew0() new reader slots diff --git a/mdb.c b/mdb.c index 671e5312..7840d16e 100644 --- a/mdb.c +++ b/mdb.c @@ -7738,32 +7738,48 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst) /* Adjust other cursors pointing to mp */ MDB_cursor *m2, *m3; MDB_dbi dbi = csrc->mc_dbi; - MDB_page *mp; + MDB_page *mpd, *mps; - mp = cdst->mc_pg[csrc->mc_top]; - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (csrc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == cdst) continue; - if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] >= - cdst->mc_ki[csrc->mc_top]) { - m3->mc_ki[csrc->mc_top]++; + mps = csrc->mc_pg[csrc->mc_top]; + /* If we're adding on the left, bump others up */ + if (!cdst->mc_ki[csrc->mc_top]) { + mpd = cdst->mc_pg[csrc->mc_top]; + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 != cdst && + m3->mc_pg[csrc->mc_top] == mpd && + m3->mc_ki[csrc->mc_top] >= cdst->mc_ki[csrc->mc_top]) { + m3->mc_ki[csrc->mc_top]++; + } + if (m3 !=csrc && + m3->mc_pg[csrc->mc_top] == mps && + m3->mc_ki[csrc->mc_top] == csrc->mc_ki[csrc->mc_top]) { + m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; + m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; + m3->mc_ki[csrc->mc_top-1]++; + } } - } - - mp = csrc->mc_pg[csrc->mc_top]; - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (csrc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == csrc) continue; - if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] == - csrc->mc_ki[csrc->mc_top]) { - m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; - m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; + } else + /* Adding on the right, bump others down */ + { + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_pg[csrc->mc_top] == mps) { + if (!m3->mc_ki[csrc->mc_top]) { + m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; + m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; + m3->mc_ki[csrc->mc_top-1]--; + } else { + m3->mc_ki[csrc->mc_top]--; + } + } } } } @@ -8151,7 +8167,8 @@ mdb_rebalance(MDB_cursor *mc) */ if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) { rc = mdb_node_move(&mn, mc); - if (mc->mc_ki[mc->mc_top-1]) { + if (!mc->mc_ki[mc->mc_top]) { + /* if we inserted on left, bump position up */ oldki++; } } else {