mdbx: add mdbx_cursor_create() and mdbx_cursor_bind().

Change-Id: I223de3cca7865d58f17a59ab27ec6be730e04a90
This commit is contained in:
Leonid Yuriev 2020-09-15 02:05:25 +03:00
parent 234d65dc9d
commit 10b170c6cd
3 changed files with 153 additions and 80 deletions

88
mdbx.h
View File

@ -612,7 +612,8 @@ typedef uint32_t MDBX_dbi;
/** \brief Opaque structure for navigating through a database /** \brief Opaque structure for navigating through a database
* \ingroup c_cursors * \ingroup c_cursors
* \see mdbx_cursor_open() \see mdbx_cursor_close() */ * \see mdbx_cursor_create() \see mdbx_cursor_bind() \see mdbx_cursor_close()
*/
#ifndef __cplusplus #ifndef __cplusplus
typedef struct MDBX_cursor MDBX_cursor; typedef struct MDBX_cursor MDBX_cursor;
#else #else
@ -3452,17 +3453,76 @@ LIBMDBX_API int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi,
LIBMDBX_API int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, LIBMDBX_API int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key,
const MDBX_val *data); const MDBX_val *data);
/** \brief Create a cursor handle. /** \brief Create a cursor handle but not bind it to transaction nor DBI handle.
* \ingroup c_cursors * \ingroup c_cursors
* *
* A cursor is associated with a specific transaction and database. A cursor * An capable of operation cursor is associated with a specific transaction and
* cannot be used when its database handle is closed. Nor when its transaction * database. A cursor cannot be used when its database handle is closed. Nor
* has ended, except with \ref mdbx_cursor_renew(). Also it can be discarded * when its transaction has ended, except with \ref mdbx_cursor_bind() and
* with \ref mdbx_cursor_close(). * \ref mdbx_cursor_renew().
* Also it can be discarded with \ref mdbx_cursor_close().
* *
* A cursor must be closed explicitly always, before or after its transaction * A cursor must be closed explicitly always, before or after its transaction
* ends. It can be reused with \ref mdbx_cursor_renew() before finally closing * ends. It can be reused with \ref mdbx_cursor_bind()
* it. * or \ref mdbx_cursor_renew() before finally closing it.
*
* \note In contrast to LMDB, the MDBX required that any opened cursors can be
* reused and must be freed explicitly, regardless ones was opened in a
* read-only or write transaction. The REASON for this is eliminates ambiguity
* which helps to avoid errors such as: use-after-free, double-free, i.e.
* memory corruption and segfaults.
*
* \param [in] txn A transaction handle returned by \ref mdbx_txn_begin().
* \param [in] dbi A database handle returned by \ref mdbx_dbi_open().
* \param [out] cursor Address where the new \ref MDBX_cursor handle will be
* stored.
*
* \returns Created cursor handle or NULL in case out of memory. */
LIBMDBX_API MDBX_cursor *mdbx_cursor_create(void);
/** \brief Bind cursor to specified transaction and DBI handle.
* \ingroup c_cursors
*
* Using of the `mdbx_cursor_bind()` is equivalent to calling
* \ref mdbx_cursor_renew() but with specifying an arbitrary dbi handle.
*
* An capable of operation cursor is associated with a specific transaction and
* database. The cursor may be associated with a new transaction,
* and referencing a new or the same database handle as it was created with.
* This may be done whether the previous transaction is live or dead.
*
* \note In contrast to LMDB, the MDBX required that any opened cursors can be
* reused and must be freed explicitly, regardless ones was opened in a
* read-only or write transaction. The REASON for this is eliminates ambiguity
* which helps to avoid errors such as: use-after-free, double-free, i.e.
* memory corruption and segfaults.
*
* \param [in] txn A transaction handle returned by \ref mdbx_txn_begin().
* \param [in] dbi A database handle returned by \ref mdbx_dbi_open().
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_THREAD_MISMATCH Given transaction is not owned
* by current thread.
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *cursor,
MDBX_dbi dbi);
/** \brief Create a cursor handle for the specified transaction and DBI handle.
* \ingroup c_cursors
*
* Using of the `mdbx_cursor_open()` is equivalent to calling
* \ref mdbx_cursor_create() and then \ref mdbx_cursor_bind() functions.
*
* An capable of operation cursor is associated with a specific transaction and
* database. A cursor cannot be used when its database handle is closed. Nor
* when its transaction has ended, except with \ref mdbx_cursor_bind() and
* \ref mdbx_cursor_renew().
* Also it can be discarded with \ref mdbx_cursor_close().
*
* A cursor must be closed explicitly always, before or after its transaction
* ends. It can be reused with \ref mdbx_cursor_bind()
* or \ref mdbx_cursor_renew() before finally closing it.
* *
* \note In contrast to LMDB, the MDBX required that any opened cursors can be * \note In contrast to LMDB, the MDBX required that any opened cursors can be
* reused and must be freed explicitly, regardless ones was opened in a * reused and must be freed explicitly, regardless ones was opened in a
@ -3501,10 +3561,14 @@ LIBMDBX_API void mdbx_cursor_close(MDBX_cursor *cursor);
/** \brief Renew a cursor handle. /** \brief Renew a cursor handle.
* \ingroup c_cursors * \ingroup c_cursors
* *
* A cursor is associated with a specific transaction and database. The cursor * An capable of operation cursor is associated with a specific transaction and
* may be associated with a new transaction, and referencing the same database * database. The cursor may be associated with a new transaction,
* handle as it was created with. This may be done whether the previous * and referencing a new or the same database handle as it was created with.
* transaction is live or dead. * This may be done whether the previous transaction is live or dead.
*
* Using of the `mdbx_cursor_renew()` is equivalent to calling
* \ref mdbx_cursor_bind() with the DBI handle that previously
* the cursor was used with.
* *
* \note In contrast to LMDB, the MDBX allow any cursor to be re-used by using * \note In contrast to LMDB, the MDBX allow any cursor to be re-used by using
* \ref mdbx_cursor_renew(), to avoid unnecessary malloc/free overhead until it * \ref mdbx_cursor_renew(), to avoid unnecessary malloc/free overhead until it

View File

@ -5842,9 +5842,9 @@ static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) {
for (mc = cursors[i]; mc; mc = next) { for (mc = cursors[i]; mc; mc = next) {
unsigned stage = mc->mc_signature; unsigned stage = mc->mc_signature;
mdbx_ensure(txn->mt_env, mdbx_ensure(txn->mt_env,
stage == MDBX_MC_SIGNATURE || stage == MDBX_MC_WAIT4EOT); stage == MDBX_MC_LIVE || stage == MDBX_MC_WAIT4EOT);
next = mc->mc_next; next = mc->mc_next;
mdbx_tassert(txn, !next || next->mc_signature == MDBX_MC_SIGNATURE || mdbx_tassert(txn, !next || next->mc_signature == MDBX_MC_LIVE ||
next->mc_signature == MDBX_MC_WAIT4EOT); next->mc_signature == MDBX_MC_WAIT4EOT);
if ((bk = mc->mc_backup) != NULL) { if ((bk = mc->mc_backup) != NULL) {
if (merge) { if (merge) {
@ -11967,7 +11967,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (unlikely(mc == NULL)) if (unlikely(mc == NULL))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
@ -12177,7 +12177,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
if (unlikely(mc == NULL || key == NULL || data == NULL)) if (unlikely(mc == NULL || key == NULL || data == NULL))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED);
@ -12974,7 +12974,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
if (unlikely(!mc)) if (unlikely(!mc))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED); int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED);
@ -13577,7 +13577,7 @@ static __inline int mdbx_couple_init(MDBX_cursor_couple *couple,
const MDBX_dbi dbi, MDBX_txn *const txn, const MDBX_dbi dbi, MDBX_txn *const txn,
MDBX_db *const db, MDBX_dbx *const dbx, MDBX_db *const db, MDBX_dbx *const dbx,
uint8_t *const dbstate) { uint8_t *const dbstate) {
couple->outer.mc_signature = MDBX_MC_SIGNATURE; couple->outer.mc_signature = MDBX_MC_LIVE;
couple->outer.mc_next = NULL; couple->outer.mc_next = NULL;
couple->outer.mc_backup = NULL; couple->outer.mc_backup = NULL;
couple->outer.mc_dbi = dbi; couple->outer.mc_dbi = dbi;
@ -13602,7 +13602,7 @@ static __inline int mdbx_couple_init(MDBX_cursor_couple *couple,
} }
if (couple->outer.mc_db->md_flags & MDBX_DUPSORT) { if (couple->outer.mc_db->md_flags & MDBX_DUPSORT) {
couple->inner.mx_cursor.mc_signature = MDBX_MC_SIGNATURE; couple->inner.mx_cursor.mc_signature = MDBX_MC_LIVE;
couple->outer.mc_xcursor = &couple->inner; couple->outer.mc_xcursor = &couple->inner;
rc = mdbx_xcursor_init0(&couple->outer); rc = mdbx_xcursor_init0(&couple->outer);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
@ -13621,10 +13621,40 @@ static int mdbx_cursor_init(MDBX_cursor *mc, MDBX_txn *txn, MDBX_dbi dbi) {
&txn->mt_dbistate[dbi]); &txn->mt_dbistate[dbi]);
} }
int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { MDBX_cursor *mdbx_cursor_create(void) {
if (unlikely(!ret)) MDBX_cursor_couple *couple = mdbx_calloc(1, sizeof(MDBX_cursor_couple));
if (unlikely(!couple))
return nullptr;
couple->outer.mc_signature = MDBX_MC_READY4CLOSE;
couple->outer.mc_dbi = UINT_MAX;
return &couple->outer;
}
int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) {
if (unlikely(!mc))
return MDBX_EINVAL; return MDBX_EINVAL;
*ret = NULL;
if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE)) {
if (unlikely(mc->mc_signature != MDBX_MC_LIVE || mc->mc_backup))
return MDBX_EINVAL;
if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE))
return MDBX_PROBLEM;
if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) {
MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
while (*prev && *prev != mc)
prev = &(*prev)->mc_next;
if (*prev == mc)
*prev = mc->mc_next;
}
mc->mc_signature = MDBX_MC_READY4CLOSE;
mc->mc_flags = 0;
mc->mc_dbi = UINT_MAX;
}
assert(!mc->mc_backup && !mc->mc_flags);
if (unlikely(mc->mc_backup || mc->mc_flags))
return MDBX_PROBLEM;
int rc = check_txn(txn, MDBX_TXN_BLOCKED); int rc = check_txn(txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
@ -13636,24 +13666,32 @@ int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) {
if (unlikely(dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY))) if (unlikely(dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY)))
return MDBX_EACCESS; return MDBX_EACCESS;
const size_t size = (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT)
? sizeof(MDBX_cursor_couple)
: sizeof(MDBX_cursor);
MDBX_cursor *mc;
if (likely((mc = mdbx_malloc(size)) != NULL)) {
rc = mdbx_cursor_init(mc, txn, dbi); rc = mdbx_cursor_init(mc, txn, dbi);
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS))
mdbx_free(mc);
return rc; return rc;
}
if (txn->mt_cursors) { if (txn->mt_cursors) {
mc->mc_next = txn->mt_cursors[dbi]; mc->mc_next = txn->mt_cursors[dbi];
txn->mt_cursors[dbi] = mc; txn->mt_cursors[dbi] = mc;
mc->mc_flags |= C_UNTRACK; mc->mc_flags |= C_UNTRACK;
} }
} else {
return MDBX_SUCCESS;
}
int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) {
if (unlikely(!ret))
return MDBX_EINVAL;
*ret = NULL;
MDBX_cursor *const mc = mdbx_cursor_create();
if (unlikely(!mc))
return MDBX_ENOMEM; return MDBX_ENOMEM;
int rc = mdbx_cursor_bind(txn, mc, dbi);
if (unlikely(rc != MDBX_SUCCESS)) {
mdbx_cursor_close(mc);
return rc;
} }
*ret = mc; *ret = mc;
@ -13661,36 +13699,7 @@ int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) {
} }
int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) { int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) {
if (unlikely(!mc)) return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL;
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE &&
mc->mc_signature != MDBX_MC_READY4CLOSE))
return MDBX_EINVAL;
int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
if (unlikely(!mdbx_txn_dbi_exists(txn, mc->mc_dbi, DBI_VALID)))
return MDBX_BAD_DBI;
if (unlikely(mc->mc_backup))
return MDBX_EINVAL;
if (unlikely((mc->mc_flags & C_UNTRACK) || txn->mt_cursors)) {
MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
while (*prev && *prev != mc)
prev = &(*prev)->mc_next;
if (*prev == mc)
*prev = mc->mc_next;
mc->mc_signature = MDBX_MC_READY4CLOSE;
}
if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED))
return MDBX_BAD_TXN;
return mdbx_cursor_init(mc, txn, mc->mc_dbi);
} }
/* Return the count of duplicate data items for the current key */ /* Return the count of duplicate data items for the current key */
@ -13698,7 +13707,7 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) {
if (unlikely(mc == NULL)) if (unlikely(mc == NULL))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED); int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
@ -13735,7 +13744,7 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) {
void mdbx_cursor_close(MDBX_cursor *mc) { void mdbx_cursor_close(MDBX_cursor *mc) {
if (mc) { if (mc) {
mdbx_ensure(NULL, mc->mc_signature == MDBX_MC_SIGNATURE || mdbx_ensure(NULL, mc->mc_signature == MDBX_MC_LIVE ||
mc->mc_signature == MDBX_MC_READY4CLOSE); mc->mc_signature == MDBX_MC_READY4CLOSE);
if (!mc->mc_backup) { if (!mc->mc_backup) {
/* Remove from txn, if tracked. /* Remove from txn, if tracked.
@ -13752,14 +13761,14 @@ void mdbx_cursor_close(MDBX_cursor *mc) {
mdbx_free(mc); mdbx_free(mc);
} else { } else {
/* cursor closed before nested txn ends */ /* cursor closed before nested txn ends */
mdbx_cassert(mc, mc->mc_signature == MDBX_MC_SIGNATURE); mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE);
mc->mc_signature = MDBX_MC_WAIT4EOT; mc->mc_signature = MDBX_MC_WAIT4EOT;
} }
} }
} }
MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) {
if (unlikely(!mc || mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE))
return NULL; return NULL;
MDBX_txn *txn = mc->mc_txn; MDBX_txn *txn = mc->mc_txn;
if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE))
@ -13770,7 +13779,7 @@ MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) {
} }
MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) { MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) {
if (unlikely(!mc || mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE))
return UINT_MAX; return UINT_MAX;
return mc->mc_dbi; return mc->mc_dbi;
} }
@ -18045,7 +18054,7 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) {
if (unlikely(mc == NULL)) if (unlikely(mc == NULL))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
if (!(mc->mc_flags & C_INITIALIZED)) if (!(mc->mc_flags & C_INITIALIZED))
@ -18063,7 +18072,7 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) {
if (unlikely(mc == NULL)) if (unlikely(mc == NULL))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
if (!(mc->mc_flags & C_INITIALIZED)) if (!(mc->mc_flags & C_INITIALIZED))
@ -18082,7 +18091,7 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) {
if (unlikely(mc == NULL)) if (unlikely(mc == NULL))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
if ((mc->mc_flags & C_INITIALIZED) == 0) if ((mc->mc_flags & C_INITIALIZED) == 0)
@ -18114,8 +18123,8 @@ __hot static int cursor_diff(const MDBX_cursor *const __restrict x,
r->level = 0; r->level = 0;
r->root_nkeys = 0; r->root_nkeys = 0;
if (unlikely(y->mc_signature != MDBX_MC_SIGNATURE || if (unlikely(y->mc_signature != MDBX_MC_LIVE ||
x->mc_signature != MDBX_MC_SIGNATURE)) x->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
int rc = check_txn(x->mc_txn, MDBX_TXN_BLOCKED); int rc = check_txn(x->mc_txn, MDBX_TXN_BLOCKED);
@ -18281,7 +18290,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data,
move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE)) move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(cursor->mc_signature != MDBX_MC_SIGNATURE)) if (unlikely(cursor->mc_signature != MDBX_MC_LIVE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
int rc = check_txn(cursor->mc_txn, MDBX_TXN_BLOCKED); int rc = check_txn(cursor->mc_txn, MDBX_TXN_BLOCKED);

View File

@ -852,7 +852,7 @@ struct MDBX_xcursor;
* Exception: An xcursor's pointer to a P_SUBP page can be stale. * Exception: An xcursor's pointer to a P_SUBP page can be stale.
* (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */ * (A node with F_DUPDATA but no F_SUBDATA contains a subpage). */
struct MDBX_cursor { struct MDBX_cursor {
#define MDBX_MC_SIGNATURE UINT32_C(0xFE05D5B1) #define MDBX_MC_LIVE UINT32_C(0xFE05D5B1)
#define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047) #define MDBX_MC_READY4CLOSE UINT32_C(0x2817A047)
#define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7) #define MDBX_MC_WAIT4EOT UINT32_C(0x90E297A7)
uint32_t mc_signature; uint32_t mc_signature;