mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 16:24:12 +08:00
mdbx: Merge branch 'devel'.
This commit is contained in:
commit
8e0f412711
14
Makefile
14
Makefile
@ -198,13 +198,15 @@ bench: bench-lmdb.txt bench-mdbx.txt
|
||||
|
||||
endif
|
||||
|
||||
ci-rule = @( CC=$$(which $1); if [ -n "$$CC" ]; then \
|
||||
ci-rule = ( CC=$$(which $1); if [ -n "$$CC" ]; then \
|
||||
CC=$$(readlink -f $$CC); echo -n "probe by $2 ($$CC): " && \
|
||||
$(MAKE) clean >$1.log 2>$1.err && $(MAKE) all check 1>$1.log 2>$1.err && echo "OK" \
|
||||
$(MAKE) XCFLAGS="-UNDEBUG -DMDB_DEBUG=2" clean >$1.log 2>$1.err && $(MAKE) all check 1>$1.log 2>$1.err && echo "OK" \
|
||||
|| ( echo "FAILED"; cat $1.err >&2; exit 1 ); \
|
||||
else echo "no $2 ($1) for probe"; fi; )
|
||||
ci:
|
||||
$(call ci-rule,cc,default C compiler)
|
||||
$(call ci-rule,gcc,GCC)
|
||||
$(call ci-rule,clang,clang LLVM)
|
||||
$(call ci-rule,icc,Intel C)
|
||||
@if [ "$(CC)" != "gcc" ]; then \
|
||||
$(call ci-rule,$(CC),default C compiler); \
|
||||
fi
|
||||
@$(call ci-rule,gcc,GCC)
|
||||
@$(call ci-rule,clang,clang LLVM)
|
||||
@$(call ci-rule,icc,Intel C)
|
||||
|
92
mdb.c
92
mdb.c
@ -532,9 +532,23 @@ typedef struct MDB_txninfo {
|
||||
+ (1 /* MDB_PIDLOCK */ << 16)))
|
||||
/** @} */
|
||||
|
||||
/** Common header for all page types.
|
||||
* Overflow records occupy a number of contiguous pages with no
|
||||
* headers on any page after the first.
|
||||
/** Common header for all page types. The page type depends on #mp_flags.
|
||||
*
|
||||
* #P_BRANCH and #P_LEAF pages have unsorted '#MDB_node's at the end, with
|
||||
* sorted #mp_ptrs[] entries referring to them. Exception: #P_LEAF2 pages
|
||||
* omit mp_ptrs and pack sorted #MDB_DUPFIXED values after the page header.
|
||||
*
|
||||
* #P_OVERFLOW records occupy one or more contiguous pages where only the
|
||||
* first has a page header. They hold the real data of #F_BIGDATA nodes.
|
||||
*
|
||||
* #P_SUBP sub-pages are small leaf "pages" with duplicate data.
|
||||
* A node with flag #F_DUPDATA but not #F_SUBDATA contains a sub-page.
|
||||
* (Duplicate data can also go in sub-databases, which use normal pages.)
|
||||
*
|
||||
* #P_META pages contain #MDB_meta, the start point of an LMDB snapshot.
|
||||
*
|
||||
* Each non-metapage up to #MDB_meta.%mm_last_pg is reachable exactly once
|
||||
* in the snapshot: Either used by a database or listed in a freeDB record.
|
||||
*/
|
||||
typedef struct MDB_page {
|
||||
#define mp_pgno mp_p.p_pgno
|
||||
@ -543,7 +557,7 @@ typedef struct MDB_page {
|
||||
pgno_t p_pgno; /**< page number */
|
||||
struct MDB_page *p_next; /**< for in-memory list of freed pages */
|
||||
} mp_p;
|
||||
uint16_t mp_ksize;
|
||||
uint16_t mp_leaf2_ksize; /**< key size if this is a LEAF2 page */
|
||||
/** @defgroup mdb_page Page Flags
|
||||
* @ingroup internal
|
||||
* Flags for the page headers.
|
||||
@ -610,7 +624,9 @@ typedef struct MDB_page {
|
||||
/** The number of overflow pages needed to store the given size. */
|
||||
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
|
||||
|
||||
/** Link in #MDB_txn.%mt_loose_pgs list */
|
||||
/** Link in #MDB_txn.%mt_loose_pgs list.
|
||||
* Kept outside the page header, which is needed when reusing the page.
|
||||
*/
|
||||
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2))
|
||||
|
||||
/** Header for a single key/data pair within a page.
|
||||
@ -737,9 +753,9 @@ typedef struct MDB_db {
|
||||
pgno_t md_root; /**< the root page of this tree */
|
||||
} MDB_db;
|
||||
|
||||
/** mdb_dbi_open flags */
|
||||
#define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */
|
||||
#define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID))
|
||||
/** #mdb_dbi_open() flags */
|
||||
#define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\
|
||||
MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE)
|
||||
|
||||
@ -770,7 +786,10 @@ typedef struct MDB_meta {
|
||||
#define mm_psize mm_dbs[FREE_DBI].md_xsize
|
||||
/** Any persistent environment flags. @ref mdb_env */
|
||||
#define mm_flags mm_dbs[FREE_DBI].md_flags
|
||||
pgno_t mm_last_pg; /**< last used page in file */
|
||||
/** Last used page in the datafile.
|
||||
* Actually the file may be shorter if the freeDB lists the final pages.
|
||||
*/
|
||||
pgno_t mm_last_pg;
|
||||
volatile txnid_t mm_txnid; /**< txnid that committed this page */
|
||||
#define MDB_DATASIGN_NONE 0
|
||||
#define MDB_DATASIGN_WEAK 1
|
||||
@ -829,7 +848,7 @@ struct MDB_txn {
|
||||
* in this transaction, linked through #NEXT_LOOSE_PAGE(page).
|
||||
*/
|
||||
MDB_page *mt_loose_pgs;
|
||||
/* #Number of loose pages (#mt_loose_pgs) */
|
||||
/** Number of loose pages (#mt_loose_pgs) */
|
||||
int mt_loose_count;
|
||||
/** The sorted list of dirty pages we temporarily wrote to disk
|
||||
* because the dirty list was full. page numbers in here are
|
||||
@ -1232,7 +1251,11 @@ static txnid_t mdbx_oomkick(MDB_env *env, txnid_t oldest);
|
||||
(type & (MDBX_DBG_TRACE | MDBX_DBG_EXTRA)))
|
||||
|
||||
#else
|
||||
# define mdb_debug_enabled(type) (0)
|
||||
# ifndef NDEBUG
|
||||
# define mdb_debug_enabled(type) (1)
|
||||
# else
|
||||
# define mdb_debug_enabled(type) (0)
|
||||
# endif
|
||||
# define mdb_audit_enabled() (0)
|
||||
# define mdb_assert_enabled() (0)
|
||||
# define mdb_assert_fail(env, msg, func, line) \
|
||||
@ -1386,7 +1409,7 @@ mdb_page_list(MDB_page *mp)
|
||||
|
||||
for (i=0; i<nkeys; i++) {
|
||||
if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
|
||||
key.mv_size = nsize = mp->mp_ksize;
|
||||
key.mv_size = nsize = mp->mp_leaf2_ksize;
|
||||
key.mv_data = LEAF2KEY(mp, i, nsize);
|
||||
total += nsize;
|
||||
mdb_print("key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
|
||||
@ -2347,7 +2370,7 @@ done:
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(np, env->me_psize * num);
|
||||
|
||||
np->mp_pgno = pgno;
|
||||
np->mp_ksize = 0;
|
||||
np->mp_leaf2_ksize = 0;
|
||||
np->mp_flags = 0;
|
||||
np->mp_pages = num;
|
||||
mdb_page_dirty(txn, np);
|
||||
@ -4084,18 +4107,12 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
|
||||
int rc;
|
||||
MDB_meta* head = mdb_meta_head_w(env);
|
||||
size_t prev_mapsize = head->mm_mapsize;
|
||||
volatile MDB_meta* target = META_IS_WEAK(head) ? head : mdb_env_meta_flipflop(env, head);
|
||||
off_t offset = (char*) target - env->me_map;
|
||||
size_t used_size = env->me_psize * (pending->mm_last_pg + 1);
|
||||
|
||||
mdb_assert(env, pending != METAPAGE_1(env) && pending != METAPAGE_2(env));
|
||||
mdb_assert(env, (env->me_flags & (MDB_RDONLY | MDB_FATAL_ERROR)) == 0);
|
||||
mdb_assert(env, META_IS_WEAK(head) || env->me_sync_pending != 0
|
||||
|| env->me_mapsize != prev_mapsize);
|
||||
mdb_assert(env, pending->mm_txnid > head->mm_txnid || META_IS_WEAK(head));
|
||||
mdb_assert(env, pending->mm_txnid > target->mm_txnid || META_IS_WEAK(target));
|
||||
|
||||
MDB_meta* stay = mdb_env_meta_flipflop(env, (MDB_meta*) target);
|
||||
mdb_assert(env, pending->mm_txnid > stay->mm_txnid);
|
||||
|
||||
pending->mm_mapsize = env->me_mapsize;
|
||||
mdb_assert(env, pending->mm_mapsize >= used_size);
|
||||
@ -4117,6 +4134,7 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
|
||||
int mode = (flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
|
||||
if (unlikely(msync(env->me_map, used_size, mode))) {
|
||||
rc = errno;
|
||||
/* LY: msync() should never return EINTR */
|
||||
goto fail;
|
||||
}
|
||||
if ((flags & MDB_MAPASYNC) == 0)
|
||||
@ -4139,7 +4157,7 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
|
||||
while(unlikely(flush(env->me_fd) < 0)) {
|
||||
rc = errno;
|
||||
if (rc != EINTR)
|
||||
goto undo;
|
||||
goto fail;
|
||||
}
|
||||
env->me_sync_pending = 0;
|
||||
}
|
||||
@ -4150,12 +4168,22 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
|
||||
pending->mm_datasync_sign = mdb_meta_sign(pending);
|
||||
} else {
|
||||
pending->mm_datasync_sign =
|
||||
(flags & MDBX_UTTERLY_NOSYNC) == MDBX_UTTERLY_NOSYNC
|
||||
? MDB_DATASIGN_NONE : MDB_DATASIGN_WEAK;
|
||||
(flags & MDBX_UTTERLY_NOSYNC) == MDBX_UTTERLY_NOSYNC
|
||||
? MDB_DATASIGN_NONE : MDB_DATASIGN_WEAK;
|
||||
}
|
||||
mdb_debug("writing meta %d, root %zu, txn_id %zu, %s",
|
||||
offset >= env->me_psize, pending->mm_dbs[MAIN_DBI].md_root,
|
||||
pending->mm_txnid,
|
||||
|
||||
volatile MDB_meta* target = (pending->mm_txnid == head->mm_txnid || META_IS_WEAK(head))
|
||||
? head : mdb_env_meta_flipflop(env, head);
|
||||
off_t offset = (char*) target - env->me_map;
|
||||
|
||||
MDB_meta* stay = mdb_env_meta_flipflop(env, (MDB_meta*) target);
|
||||
mdb_debug("writing meta %d (%s, was %zu/%s, stay %s %zu/%s), root %zu, txn_id %zu, %s",
|
||||
offset >= env->me_psize,
|
||||
target == head ? "head" : "tail", target->mm_txnid,
|
||||
META_IS_WEAK(target) ? "Weak" : META_IS_STEADY(target) ? "Steady" : "Legacy",
|
||||
stay == head ? "head" : "tail", stay->mm_txnid,
|
||||
META_IS_WEAK(stay) ? "Weak" : META_IS_STEADY(stay) ? "Steady" : "Legacy",
|
||||
pending->mm_dbs[MAIN_DBI].md_root, pending->mm_txnid,
|
||||
META_IS_WEAK(pending) ? "Weak" : META_IS_STEADY(pending) ? "Steady" : "Legacy" );
|
||||
|
||||
if (env->me_flags & MDB_WRITEMAP) {
|
||||
@ -6642,7 +6670,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
||||
*/
|
||||
fp_flags = P_LEAF|P_DIRTY;
|
||||
fp = env->me_pbuf;
|
||||
fp->mp_ksize = data->mv_size; /* used if MDB_DUPFIXED */
|
||||
fp->mp_leaf2_ksize = data->mv_size; /* used if MDB_DUPFIXED */
|
||||
fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE);
|
||||
olddata.mv_size = PAGEHDRSZ;
|
||||
goto prep_subDB;
|
||||
@ -6719,7 +6747,7 @@ more:
|
||||
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
|
||||
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
||||
fp->mp_flags |= P_LEAF2;
|
||||
fp->mp_ksize = data->mv_size;
|
||||
fp->mp_leaf2_ksize = data->mv_size;
|
||||
xdata.mv_size += 2 * data->mv_size; /* leave space for 2 more */
|
||||
} else {
|
||||
xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) +
|
||||
@ -6741,7 +6769,7 @@ more:
|
||||
data->mv_size);
|
||||
break;
|
||||
}
|
||||
offset = fp->mp_ksize;
|
||||
offset = fp->mp_leaf2_ksize;
|
||||
if (SIZELEFT(fp) < offset) {
|
||||
offset *= 4; /* space for 4 more */
|
||||
break;
|
||||
@ -6764,7 +6792,7 @@ more:
|
||||
prep_subDB:
|
||||
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
||||
fp_flags |= P_LEAF2;
|
||||
dummy.md_xsize = fp->mp_ksize;
|
||||
dummy.md_xsize = fp->mp_leaf2_ksize;
|
||||
dummy.md_flags = MDB_DUPFIXED;
|
||||
if (mc->mc_db->md_flags & MDB_INTEGERDUP)
|
||||
dummy.md_flags |= MDB_INTEGERKEY;
|
||||
@ -6788,11 +6816,11 @@ prep_subDB:
|
||||
}
|
||||
if (mp != fp) {
|
||||
mp->mp_flags = fp_flags | P_DIRTY;
|
||||
mp->mp_ksize = fp->mp_ksize;
|
||||
mp->mp_leaf2_ksize = fp->mp_leaf2_ksize;
|
||||
mp->mp_lower = fp->mp_lower;
|
||||
mp->mp_upper = fp->mp_upper + offset;
|
||||
if (fp_flags & P_LEAF2) {
|
||||
memcpy(PAGEDATA(mp), PAGEDATA(fp), NUMKEYS(fp) * fp->mp_ksize);
|
||||
memcpy(PAGEDATA(mp), PAGEDATA(fp), NUMKEYS(fp) * fp->mp_leaf2_ksize);
|
||||
} else {
|
||||
memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE,
|
||||
olddata.mv_size - fp->mp_upper - PAGEBASE);
|
||||
@ -7537,7 +7565,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
|
||||
mx->mx_cursor.mc_ki[0] = 0;
|
||||
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
||||
mx->mx_db.md_flags = MDB_DUPFIXED;
|
||||
mx->mx_db.md_xsize = fp->mp_ksize;
|
||||
mx->mx_db.md_xsize = fp->mp_leaf2_ksize;
|
||||
if (mc->mc_db->md_flags & MDB_INTEGERDUP)
|
||||
mx->mx_db.md_flags |= MDB_INTEGERKEY;
|
||||
}
|
||||
@ -8621,7 +8649,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
||||
/* Create a right sibling. */
|
||||
if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp)))
|
||||
return rc;
|
||||
rp->mp_ksize = mp->mp_ksize;
|
||||
rp->mp_leaf2_ksize = mp->mp_leaf2_ksize;
|
||||
mdb_debug("new right sibling: page %zu", rp->mp_pgno);
|
||||
|
||||
/* Usually when splitting the root page, the cursor
|
||||
|
Loading…
x
Reference in New Issue
Block a user