From 17c6555a7fef4eb8711e826bf3401b8b9b6e93cf Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Mon, 21 Nov 2016 20:50:39 +0300 Subject: [PATCH 01/13] mdbx: add 'canary' support for libfpta. Change-Id: I62c68f149adf38d65aa9371a1fb3adac405d23ed --- mdb.c | 22 +++++++++++++++++++++- mdbx.c | 32 ++++++++++++++++++++++++++++++++ mdbx.h | 8 ++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/mdb.c b/mdb.c index edf088ec..45fcc5e6 100644 --- a/mdb.c +++ b/mdb.c @@ -778,6 +778,10 @@ typedef struct MDB_meta { volatile uint64_t mm_datasync_sign; #define META_IS_WEAK(meta) ((meta)->mm_datasync_sign == MDB_DATASIGN_WEAK) #define META_IS_STEADY(meta) ((meta)->mm_datasync_sign > MDB_DATASIGN_WEAK) + +#if MDBX_MODE_ENABLED + volatile mdbx_canary mm_canary; +#endif } MDB_meta; /** Buffer for a stack-allocated meta page. @@ -809,7 +813,7 @@ typedef struct MDB_dbx { * Every operation requires a transaction handle. */ struct MDB_txn { -#define MDBX_MT_SIGNATURE 0x706C553B +#define MDBX_MT_SIGNATURE 0x93D53A31 unsigned mt_signature; MDB_txn *mt_parent; /**< parent of a nested txn */ /** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */ @@ -895,6 +899,10 @@ struct MDB_txn { * dirty_list into mt_parent after freeing hidden mt_parent pages. */ unsigned mt_dirty_room; + +#if MDBX_MODE_ENABLED + mdbx_canary mt_canary; +#endif }; /** Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. @@ -2842,6 +2850,9 @@ mdb_txn_renew0(MDB_txn *txn, unsigned flags) txn->mt_next_pgno = meta->mm_last_pg+1; /* Copy the DB info and flags */ memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDB_db)); +#if MDBX_MODE_ENABLED + txn->mt_canary = meta->mm_canary; +#endif break; } } @@ -2858,6 +2869,9 @@ mdb_txn_renew0(MDB_txn *txn, unsigned flags) pthread_mutex_lock(&tsan_mutex); #endif MDB_meta *meta = mdb_meta_head_w(env); +#if MDBX_MODE_ENABLED + txn->mt_canary = meta->mm_canary; +#endif txn->mt_txnid = meta->mm_txnid + 1; txn->mt_flags = flags; #ifdef __SANITIZE_THREAD__ @@ -3919,6 +3933,9 @@ mdb_txn_commit(MDB_txn *txn) meta.mm_dbs[MAIN_DBI] = txn->mt_dbs[MAIN_DBI]; meta.mm_last_pg = txn->mt_next_pgno - 1; meta.mm_txnid = txn->mt_txnid; +#if MDBX_MODE_ENABLED + meta.mm_canary = txn->mt_canary; +#endif rc = mdb_env_sync0(env, env->me_flags | txn->mt_flags, &meta); } @@ -4155,6 +4172,9 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending) target->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI]; target->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI]; target->mm_last_pg = pending->mm_last_pg; +#if MDBX_MODE_ENABLED + target->mm_canary = pending->mm_canary; +#endif /* LY: 'commit' the meta */ target->mm_txnid = pending->mm_txnid; target->mm_datasync_sign = pending->mm_datasync_sign; diff --git a/mdbx.c b/mdbx.c index fb4aac85..cf5187fe 100644 --- a/mdbx.c +++ b/mdbx.c @@ -320,3 +320,35 @@ mdbx_env_pgwalk(MDB_txn *txn, MDBX_pgvisitor_func* visitor, void* user) rc = visitor(P_INVALID, 0, user, NULL, NULL, 0, 0, 0, 0); return rc; } + +int mdbx_canary_put(MDB_txn *txn, const mdbx_canary* canary) +{ + if (unlikely(!txn)) + return EINVAL; + + if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDB_VERSION_MISMATCH; + + if (unlikely(F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))) + return EACCES; + + if (likely(canary)) { + txn->mt_canary.x = canary->x; + txn->mt_canary.y = canary->y; + txn->mt_canary.z = canary->z; + } + txn->mt_canary.v = txn->mt_txnid; + + return MDB_SUCCESS; +} + +size_t mdbx_canary_get(MDB_txn *txn, mdbx_canary* canary) +{ + if(unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) + return 0; + + if (likely(canary)) + *canary = txn->mt_canary; + + return txn->mt_txnid; +} diff --git a/mdbx.h b/mdbx.h index bcbd4f47..d494ef36 100644 --- a/mdbx.h +++ b/mdbx.h @@ -211,6 +211,14 @@ typedef int MDBX_pgvisitor_func(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi, const char *type, int nentries, int payload_bytes, int header_bytes, int unused_bytes); int mdbx_env_pgwalk(MDB_txn *txn, MDBX_pgvisitor_func* visitor, void* ctx); + +typedef struct mdbx_canary { + size_t x, y, z, v; +} mdbx_canary; + +int mdbx_canary_put(MDB_txn *txn, const mdbx_canary* canary); +size_t mdbx_canary_get(MDB_txn *txn, mdbx_canary* canary); + /** @} */ #ifdef __cplusplus From f9f132671cc245cfe39519e5dbc9d5044b8bd973 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Tue, 6 Dec 2016 20:08:08 +0300 Subject: [PATCH 02/13] mdbx: explicit overwrite support for mdbx_put(). --- lmdb.h | 3 ++- mdb.c | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/lmdb.h b/lmdb.h index 2d2aabb7..1237fdcd 100644 --- a/lmdb.h +++ b/lmdb.h @@ -349,7 +349,8 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel * For mdb_cursor_del: remove all duplicate data items. */ #define MDB_NODUPDATA 0x20 -/** For mdb_cursor_put: overwrite the current key/data pair */ +/** For mdb_cursor_put: overwrite the current key/data pair + * MDBX allows this flag for mdb_put() for explicit overwrite/update without insertion. */ #define MDB_CURRENT 0x40 /** For put: Just reserve space for data, don't copy it. Return a * pointer to the reserved space. diff --git a/mdb.c b/mdb.c index 5e1dcffc..21933378 100644 --- a/mdb.c +++ b/mdb.c @@ -9032,7 +9032,6 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, { MDB_cursor mc; MDB_xcursor mx; - int rc; if (unlikely(!key || !data || !txn)) return EINVAL; @@ -9043,17 +9042,33 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) return EINVAL; - if (unlikely(flags & ~(MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP))) + if (unlikely(flags & ~(MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP + /* LY: MDB_CURRENT indicates explicit overwrite (update) for MDBX */ + | (MDBX_MODE_ENABLED ? MDB_CURRENT : 0)))) return EINVAL; if (unlikely(txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))) return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; +#if MDBX_MODE_ENABLED + /* LY: allows update (explicit overwrite) only for unique keys */ + if ((flags & MDB_CURRENT) && (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)) + return EINVAL; +#endif /* MDBX_MODE_ENABLED */ + mdb_cursor_init(&mc, txn, dbi, &mx); mc.mc_next = txn->mt_cursors[dbi]; txn->mt_cursors[dbi] = &mc; - rc = mdb_cursor_put(&mc, key, data, flags); + int rc = MDB_SUCCESS; +#if MDBX_MODE_ENABLED + /* LY: support for update (explicit overwrite) */ + if (flags & MDB_CURRENT) + rc = mdb_cursor_get(&mc, key, NULL, MDB_SET); +#endif /* MDBX_MODE_ENABLED */ + if (likely(rc == MDB_SUCCESS)) + rc = mdb_cursor_put(&mc, key, data, flags); txn->mt_cursors[dbi] = mc.mc_next; + return rc; } From b980878b0727396fa9c9812c1d879059743a5288 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Tue, 6 Dec 2016 20:32:09 +0300 Subject: [PATCH 03/13] mdbx: adds mdbx_cursor_eof() for libfpta. --- mdbx.c | 11 +++++++++++ mdbx.h | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/mdbx.c b/mdbx.c index cf5187fe..d7001778 100644 --- a/mdbx.c +++ b/mdbx.c @@ -352,3 +352,14 @@ size_t mdbx_canary_get(MDB_txn *txn, mdbx_canary* canary) return txn->mt_txnid; } + +int mdbx_cursor_eof(MDB_cursor *mc) +{ + if (unlikely(mc == NULL)) + return EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) + return MDB_VERSION_MISMATCH; + + return (mc->mc_flags & (C_INITIALIZED | C_EOF)) != C_INITIALIZED ? 1 : 0; +} diff --git a/mdbx.h b/mdbx.h index d494ef36..8a5c238e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -219,6 +219,10 @@ typedef struct mdbx_canary { int mdbx_canary_put(MDB_txn *txn, const mdbx_canary* canary); size_t mdbx_canary_get(MDB_txn *txn, mdbx_canary* canary); +/** Returns 1 when no more data available or cursor not positioned, + * 0 otherwise or less that zero in error case. */ +int mdbx_cursor_eof(MDB_cursor *mc); + /** @} */ #ifdef __cplusplus From c4f4d9ebf3f9690ff85151f66f2497b69b3da00a Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Thu, 8 Dec 2016 16:33:17 +0300 Subject: [PATCH 04/13] mdbx: rethink mdbx_cursor_eof() for libfpta. --- mdbx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.c b/mdbx.c index d7001778..64c57683 100644 --- a/mdbx.c +++ b/mdbx.c @@ -361,5 +361,5 @@ int mdbx_cursor_eof(MDB_cursor *mc) if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) return MDB_VERSION_MISMATCH; - return (mc->mc_flags & (C_INITIALIZED | C_EOF)) != C_INITIALIZED ? 1 : 0; + return (mc->mc_flags & C_INITIALIZED) ? 0 : 1; } From 5865c74876578f55cf6bee9f94b2782b36655c49 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Thu, 15 Dec 2016 21:55:28 +0300 Subject: [PATCH 05/13] mdbx: rework overwrite support for mdbx_put(). --- mdb.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mdb.c b/mdb.c index ab72b9e6..16a24bbd 100644 --- a/mdb.c +++ b/mdb.c @@ -9042,20 +9042,23 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, if (unlikely(txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))) return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; -#if MDBX_MODE_ENABLED - /* LY: allows update (explicit overwrite) only for unique keys */ - if ((flags & MDB_CURRENT) && (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)) - return EINVAL; -#endif /* MDBX_MODE_ENABLED */ - mdb_cursor_init(&mc, txn, dbi, &mx); mc.mc_next = txn->mt_cursors[dbi]; txn->mt_cursors[dbi] = &mc; int rc = MDB_SUCCESS; #if MDBX_MODE_ENABLED /* LY: support for update (explicit overwrite) */ - if (flags & MDB_CURRENT) + if (flags & MDB_CURRENT) { rc = mdb_cursor_get(&mc, key, NULL, MDB_SET); + if (likely(rc == MDB_SUCCESS) && (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)) { + /* LY: allows update (explicit overwrite) only for unique keys */ + MDB_node *leaf = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_tassert(txn, XCURSOR_INITED(&mc) && mc.mc_xcursor->mx_db.md_entries > 1); + rc = MDB_KEYEXIST; + } + } + } #endif /* MDBX_MODE_ENABLED */ if (likely(rc == MDB_SUCCESS)) rc = mdb_cursor_put(&mc, key, data, flags); From 2956095c6ded72d22e10e6d1cad4a5410ea52994 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Thu, 15 Dec 2016 21:56:45 +0300 Subject: [PATCH 06/13] mdbx: rework MDB_CURRENT handling for libfpta. --- mdb.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mdb.c b/mdb.c index 447a618d..411f5ca2 100644 --- a/mdb.c +++ b/mdb.c @@ -6185,7 +6185,6 @@ set1: rc = 0; } *data = olddata; - } else { if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); @@ -6587,7 +6586,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, dkey.mv_size = 0; - if (flags == MDB_CURRENT) { + if (flags & MDB_CURRENT) { if (unlikely(!(mc->mc_flags & C_INITIALIZED))) return EINVAL; rc = MDB_SUCCESS; @@ -6778,6 +6777,7 @@ more: break; } /* FALLTHRU: Big enough MDB_DUPFIXED sub-page */ + case MDB_CURRENT | MDB_NODUPDATA: case MDB_CURRENT: fp->mp_flags |= P_DIRTY; COPY_PGNO(fp->mp_pgno, mp->mp_pgno); @@ -6975,12 +6975,15 @@ put_sub: xdata.mv_size = 0; xdata.mv_data = ""; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + xflags = MDB_NOSPILL; + if (flags & MDB_NODUPDATA) + xflags |= MDB_NOOVERWRITE; + if (flags & MDB_APPENDDUP) + xflags |= MDB_APPEND; if (flags & MDB_CURRENT) { - xflags = MDB_CURRENT|MDB_NOSPILL; + xflags |= MDB_CURRENT; } else { mdb_xcursor_init1(mc, leaf); - xflags = (flags & MDB_NODUPDATA) ? - MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL; } if (sub_root) mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; @@ -7014,8 +7017,6 @@ put_sub: } } ecount = mc->mc_xcursor->mx_db.md_entries; - if (flags & MDB_APPENDDUP) - xflags |= MDB_APPEND; rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags); if (flags & F_SUBDATA) { void *db = NODEDATA(leaf); From 578fe9e2586cb7dbaa38a79ab1943388581782d8 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sun, 18 Dec 2016 00:58:26 +0300 Subject: [PATCH 07/13] mdbx: allows zero-length keys for libfpta. --- mdb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mdb.c b/mdb.c index 411f5ca2..83cd994c 100644 --- a/mdb.c +++ b/mdb.c @@ -6011,9 +6011,6 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_node *leaf = NULL; DKBUF; - if (unlikely(key->mv_size == 0)) - return MDB_BAD_VALSIZE; - if ( (mc->mc_db->md_flags & MDB_INTEGERKEY) && unlikely( key->mv_size != sizeof(unsigned) && key->mv_size != sizeof(size_t) )) { @@ -6556,7 +6553,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (unlikely(mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))) return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; - if (unlikely(key->mv_size-1 >= ENV_MAXKEY(env))) + if (unlikely(key->mv_size > ENV_MAXKEY(env))) return MDB_BAD_VALSIZE; #if SIZE_MAX > MAXDATASIZE From efcf60dfaa0ed33ecab5bd5ccfb93906dd96862c Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sun, 18 Dec 2016 20:15:27 +0300 Subject: [PATCH 08/13] mdbx: fix MDB_GET_CURRENT for dupsort's subcursor. --- mdb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mdb.c b/mdb.c index 83cd994c..1f4cf026 100644 --- a/mdb.c +++ b/mdb.c @@ -6326,6 +6326,12 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_GET_KEY(leaf, key); if (data) { if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (unlikely(!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (unlikely(rc)) + break; + } rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); } else { rc = mdb_node_read(mc, leaf, data); From 7e682540510d5fb140fe42e4824a5587f0a3ec62 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Wed, 21 Dec 2016 20:29:19 +0300 Subject: [PATCH 09/13] mdbx: mdb_chk - don't close dbi-handles, set_maxdbs() instead. --- mdb_chk.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mdb_chk.c b/mdb_chk.c index b86b8f96..fd485fe1 100644 --- a/mdb_chk.c +++ b/mdb_chk.c @@ -432,7 +432,6 @@ static int process_db(MDB_dbi dbi, char *name, visitor *handler, int silent) fflush(NULL); } skipped_subdb++; - mdbx_dbi_close(env, dbi); return MDB_SUCCESS; } @@ -444,14 +443,12 @@ static int process_db(MDB_dbi dbi, char *name, visitor *handler, int silent) rc = mdbx_dbi_flags(txn, dbi, &flags); if (rc) { error(" - mdbx_dbi_flags failed, error %d %s\n", rc, mdbx_strerror(rc)); - mdbx_dbi_close(env, dbi); return rc; } rc = mdbx_stat(txn, dbi, &ms, sizeof(ms)); if (rc) { error(" - mdbx_stat failed, error %d %s\n", rc, mdbx_strerror(rc)); - mdbx_dbi_close(env, dbi); return rc; } @@ -475,7 +472,6 @@ static int process_db(MDB_dbi dbi, char *name, visitor *handler, int silent) rc = mdbx_cursor_open(txn, dbi, &mc); if (rc) { error(" - mdbx_cursor_open failed, error %d %s\n", rc, mdbx_strerror(rc)); - mdbx_dbi_close(env, dbi); return rc; } @@ -565,7 +561,6 @@ bailout: } mdbx_cursor_close(mc); - mdbx_dbi_close(env, dbi); return rc || problems_count; } @@ -686,7 +681,11 @@ int main(int argc, char *argv[]) } maxkeysize = rc; - mdbx_env_set_maxdbs(env, 3); + rc = mdbx_env_set_maxdbs(env, MAX_DBI); + if (rc < 0) { + error("mdbx_env_set_maxdbs failed, error %d %s\n", rc, mdbx_strerror(rc)); + goto bailout; + } rc = mdbx_env_open_ex(env, envname, envflags, 0664, &exclusive); if (rc) { From 91bb3ab9fa1068bfbdb61455c9045b105f36095e Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Wed, 21 Dec 2016 20:31:46 +0300 Subject: [PATCH 10/13] mdbx: mdb_chk - cosmetics (no extra \n). --- mdb_chk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdb_chk.c b/mdb_chk.c index fd485fe1..b61c06b7 100644 --- a/mdb_chk.c +++ b/mdb_chk.c @@ -746,7 +746,7 @@ int main(int argc, char *argv[]) meta_lt(info.me_meta1_txnid, info.me_meta1_sign, info.me_meta2_txnid, info.me_meta2_sign) ? "tail" : "head"); if (info.me_meta1_txnid > info.base.me_last_txnid) - print(", rolled-back %zu (%zu >>> %zu)\n", + print(", rolled-back %zu (%zu >>> %zu)", info.me_meta1_txnid - info.base.me_last_txnid, info.me_meta1_txnid, info.base.me_last_txnid); print("\n"); @@ -756,7 +756,7 @@ int main(int argc, char *argv[]) meta_lt(info.me_meta2_txnid, info.me_meta2_sign, info.me_meta1_txnid, info.me_meta1_sign) ? "tail" : "head"); if (info.me_meta2_txnid > info.base.me_last_txnid) - print(", rolled-back %zu (%zu >>> %zu)\n", + print(", rolled-back %zu (%zu >>> %zu)", info.me_meta2_txnid - info.base.me_last_txnid, info.me_meta2_txnid, info.base.me_last_txnid); print("\n"); From 9b38d8d422c79db54f4ba39685c956b44fa1a2da Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Wed, 21 Dec 2016 20:32:27 +0300 Subject: [PATCH 11/13] mdbx: adds mdbx_replace() for libfpta. --- mdbx.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mdbx.h | 3 ++ 2 files changed, 132 insertions(+) diff --git a/mdbx.c b/mdbx.c index ce930336..781450a5 100644 --- a/mdbx.c +++ b/mdbx.c @@ -368,3 +368,132 @@ int mdbx_cursor_eof(MDB_cursor *mc) return (mc->mc_flags & C_INITIALIZED) ? 0 : 1; } + +static int mdbx_is_samedata(MDB_val* a, MDB_val* b) { + return a->iov_len == b->iov_len + && memcmp(a->iov_base, b->iov_base, a->iov_len) == 0; +} + +/* Позволяет обновить или удалить существующую запись с получением + * в old_data предыдущего значения данных. При этом если new_data равен + * нулю, то выполняется удаление, иначе обновление/вставка. + * + * Текущее значение может находиться в уже измененной (грязной) странице. + * В этом случае страница будет перезаписана при обновлении, а само старое + * значение утрачено. Поэтому исходно в old_data должен быть передан + * дополнительный буфер для копирования старого значения. + * Если переданный буфер слишком мал, то функция вернет -1, установив + * old_data->iov_len в соответствующее значение. + * + * Для не-уникальных ключей также возможен второй сценарий использования, + * когда посредством old_data из записей с одинаковым ключом для + * удаления/обновления выбирается конкретная. Для выбора этого сценария + * во flags следует одновременно указать MDB_CURRENT и MDB_NOOVERWRITE. + * + * Функция может быть замещена соответствующими операциями с курсорами + * после двух доработок (TODO): + * - внешняя аллокация курсоров, в том числе на стеке (без malloc). + * - получения статуса страницы по адресу (знать о P_DIRTY). + */ +int mdbx_replace(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *new_data, MDB_val *old_data, unsigned flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + + if (unlikely(!key || !old_data || !txn)) + return EINVAL; + + if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDB_VERSION_MISMATCH; + + if (unlikely(old_data->iov_base == NULL && old_data->iov_len)) + return EINVAL; + + if (unlikely(new_data == NULL && !(flags & MDB_CURRENT))) + return EINVAL; + + if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) + return EINVAL; + + if (unlikely(flags & ~(MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP|MDB_CURRENT))) + return EINVAL; + + if (unlikely(txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + + int rc; + MDB_val present_key = *key; + if (F_ISSET(flags, MDB_CURRENT | MDB_NOOVERWRITE) + && (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)) { + /* в old_data значение для выбора конкретного дубликата */ + rc = mdbx_cursor_get(&mc, &present_key, old_data, MDB_GET_BOTH); + if (rc != MDB_SUCCESS) + goto bailout; + /* если данные совпадают, то ничего делать не надо */ + if (new_data && mdbx_is_samedata(old_data, new_data)) + goto bailout; + } else { + /* в old_data буфер получения предыдущего значения */ + MDB_val present_data; + rc = mdbx_cursor_get(&mc, &present_key, &present_data, MDB_SET_KEY); + if (unlikely(rc != MDB_SUCCESS)) { + old_data->iov_base = NULL; + old_data->iov_len = rc; + if (rc != MDB_NOTFOUND || (flags & MDB_CURRENT)) + goto bailout; + } else if (flags & MDB_NOOVERWRITE) { + rc = MDB_KEYEXIST; + *old_data = present_data; + goto bailout; + } else { + MDB_page *page = mc.mc_pg[mc.mc_top]; + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { + if (flags & MDB_CURRENT) { + /* для не-уникальных ключей позволяем update/delete только если ключ один */ + MDB_node *leaf = NODEPTR(page, mc.mc_ki[mc.mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_tassert(txn, XCURSOR_INITED(&mc) && mc.mc_xcursor->mx_db.md_entries > 1); + rc = MDB_KEYEXIST; + goto bailout; + } + /* если данные совпадают, то ничего делать не надо */ + if (new_data && mdbx_is_samedata(&present_data, new_data)) + goto bailout; + } else if ((flags & MDB_NODUPDATA) && mdbx_is_samedata(&present_data, new_data)) { + /* если данные совпадают и установлен MDB_NODUPDATA */ + rc = MDB_KEYEXIST; + goto bailout; + } + } else { + flags |= MDB_CURRENT; + } + + if (page->mp_flags & P_DIRTY) { + if (unlikely(old_data->iov_len < present_data.iov_len)) { + old_data->iov_base = NULL; + old_data->iov_len = present_data.iov_len; + rc = -1; + goto bailout; + } + memcpy(old_data->iov_base, present_data.iov_base, present_data.iov_len); + old_data->iov_len = present_data.iov_len; + } else { + *old_data = present_data; + } + } + } + + if (likely(new_data)) + rc = mdbx_cursor_put(&mc, key, new_data, flags); + else + rc = mdbx_cursor_del(&mc, 0); + +bailout: + txn->mt_cursors[dbi] = mc.mc_next; + return rc; +} diff --git a/mdbx.h b/mdbx.h index 8a5c238e..033db286 100644 --- a/mdbx.h +++ b/mdbx.h @@ -223,6 +223,9 @@ size_t mdbx_canary_get(MDB_txn *txn, mdbx_canary* canary); * 0 otherwise or less that zero in error case. */ int mdbx_cursor_eof(MDB_cursor *mc); +int mdbx_replace(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *new_data, MDB_val *old_data, unsigned flags); + /** @} */ #ifdef __cplusplus From ef375647c70ef5ee35f9c23a7fb15a6820ec7f80 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Thu, 22 Dec 2016 20:54:06 +0300 Subject: [PATCH 12/13] mdbx: fix mdbx_replace(). Always return `old_data`, even no changes. --- mdbx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mdbx.c b/mdbx.c index 781450a5..3946a50c 100644 --- a/mdbx.c +++ b/mdbx.c @@ -369,7 +369,7 @@ int mdbx_cursor_eof(MDB_cursor *mc) return (mc->mc_flags & C_INITIALIZED) ? 0 : 1; } -static int mdbx_is_samedata(MDB_val* a, MDB_val* b) { +static int mdbx_is_samedata(const MDB_val* a, const MDB_val* b) { return a->iov_len == b->iov_len && memcmp(a->iov_base, b->iov_base, a->iov_len) == 0; } @@ -462,14 +462,21 @@ int mdbx_replace(MDB_txn *txn, MDB_dbi dbi, goto bailout; } /* если данные совпадают, то ничего делать не надо */ - if (new_data && mdbx_is_samedata(&present_data, new_data)) + if (new_data && mdbx_is_samedata(&present_data, new_data)) { + *old_data = *new_data; goto bailout; + } } else if ((flags & MDB_NODUPDATA) && mdbx_is_samedata(&present_data, new_data)) { /* если данные совпадают и установлен MDB_NODUPDATA */ rc = MDB_KEYEXIST; goto bailout; } } else { + /* если данные совпадают, то ничего делать не надо */ + if (new_data && mdbx_is_samedata(&present_data, new_data)) { + *old_data = *new_data; + goto bailout; + } flags |= MDB_CURRENT; } From dbc57d3eaf9025b8a5b3d14dd2ecbd0589d1c1b4 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Fri, 23 Dec 2016 15:35:42 +0300 Subject: [PATCH 13/13] mdbx: fix cursor_count() for libfpta. --- mdb.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/mdb.c b/mdb.c index 4976179c..323edf6d 100644 --- a/mdb.c +++ b/mdb.c @@ -7713,35 +7713,51 @@ mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) int mdb_cursor_count(MDB_cursor *mc, size_t *countp) { - MDB_node *leaf; - if (unlikely(mc == NULL || countp == NULL)) return EINVAL; if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) return MDB_VERSION_MISMATCH; - if (unlikely(mc->mc_xcursor == NULL)) - return MDB_INCOMPATIBLE; - if (unlikely(mc->mc_txn->mt_flags & MDB_TXN_BLOCKED)) return MDB_BAD_TXN; if (unlikely(!(mc->mc_flags & C_INITIALIZED))) return EINVAL; +#if MDBX_MODE_ENABLED + MDB_page *mp = mc->mc_pg[mc->mc_top]; + int nkeys = NUMKEYS(mp); + if (!nkeys || mc->mc_ki[mc->mc_top] >= nkeys) { + *countp = 0; + return MDB_NOTFOUND; + } else if (mc->mc_xcursor == NULL || IS_LEAF2(mp)) { + *countp = 1; + } else { + MDB_node *leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) + *countp = 1; + else if (unlikely(!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))) + return EINVAL; + else + *countp = mc->mc_xcursor->mx_db.md_entries; + } +#else + if (unlikely(mc->mc_xcursor == NULL)) + return MDB_INCOMPATIBLE; + if (unlikely(!mc->mc_snum || (mc->mc_flags & C_EOF))) return MDB_NOTFOUND; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { *countp = 1; } else { if (unlikely(!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))) return EINVAL; - *countp = mc->mc_xcursor->mx_db.md_entries; } +#endif /* MDBX_MODE_ENABLED */ return MDB_SUCCESS; }