mdbx: add mdbx_cursor_get_batch().

Resolve https://github.com/erthink/libmdbx/issues/236
This commit is contained in:
Leonid Yuriev 2021-12-11 02:56:19 +03:00
parent 32e495021f
commit 6f2c1e52ad
5 changed files with 230 additions and 3 deletions

37
mdbx.h
View File

@ -4333,6 +4333,43 @@ LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest);
LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key,
MDBX_val *data, MDBX_cursor_op op);
/** \brief Retrieve multiple non-dupsort key/value pairs by cursor.
* \ingroup c_crud
*
* This function retrieves multiple key/data pairs from the database without
* \ref MDBX_DUPSORT option. For `MDBX_DUPSORT` databases please
* use \ref MDBX_GET_MULTIPLE and \ref MDBX_NEXT_MULTIPLE.
*
* The number of key and value items is returned in the `size_t count`
* refers. The addresses and lengths of the keys and values are returned in the
* array to which `pairs` refers.
* \see mdbx_cursor_get()
*
* \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open().
* \param [out] count The number of key and value item returned, on success
* it always be the even because the key-value
* pairs are returned.
* \param [in,out] pairs A pointer to the array of key value pairs.
* \param [in] limit The size of pairs buffer as the number of items,
* but not a pairs.
* \param [in] op A cursor operation \ref MDBX_cursor_op (only
* \ref MDBX_FIRST, \ref MDBX_NEXT, \ref MDBX_GET_CURRENT
* are supported).
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_THREAD_MISMATCH Given transaction is not owned
* by current thread.
* \retval MDBX_NOTFOUND No more key-value pairs are available.
* \retval MDBX_ENODATA The cursor is already at the end of data.
* \retval MDBX_RESULT_TRUE The specified limit is less than the available
* key-value pairs on the current page/position
* that the cursor points to.
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_cursor_get_batch(MDBX_cursor *cursor, size_t *count,
MDBX_val *pairs, size_t limit,
MDBX_cursor_op op);
/** \brief Store by cursor.
* \ingroup c_crud
*

View File

@ -3722,7 +3722,8 @@ static void mdbx_node_del(MDBX_cursor *mc, size_t ksize);
static void mdbx_node_shrink(MDBX_page *mp, unsigned indx);
static int __must_check_result mdbx_node_move(MDBX_cursor *csrc,
MDBX_cursor *cdst, bool fromleft);
static int __must_check_result mdbx_node_read(MDBX_cursor *mc, MDBX_node *leaf,
static int __must_check_result mdbx_node_read(MDBX_cursor *mc,
const MDBX_node *leaf,
MDBX_val *data,
const txnid_t front);
static int __must_check_result mdbx_rebalance(MDBX_cursor *mc);
@ -13572,8 +13573,9 @@ __hot static int mdbx_page_search(MDBX_cursor *mc, const MDBX_val *key,
* [out] data Updated to point to the node's data.
*
* Returns 0 on success, non-zero on failure. */
static __always_inline int mdbx_node_read(MDBX_cursor *mc, MDBX_node *node,
MDBX_val *data, const txnid_t front) {
static __always_inline int mdbx_node_read(MDBX_cursor *mc,
const MDBX_node *node, MDBX_val *data,
const txnid_t front) {
data->iov_len = node_ds(node);
data->iov_base = node_data(node);
if (unlikely(F_ISSET(node_flags(node), F_BIGDATA))) {
@ -14551,6 +14553,122 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
return rc;
}
static int cursor_first_batch(MDBX_cursor *mc) {
if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
int err = mdbx_page_search(mc, NULL, MDBX_PS_FIRST);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
mc->mc_flags |= C_INITIALIZED;
mc->mc_flags &= ~C_EOF;
mc->mc_ki[mc->mc_top] = 0;
return MDBX_SUCCESS;
}
static int cursor_next_batch(MDBX_cursor *mc) {
if (unlikely(!(mc->mc_flags & C_INITIALIZED)))
return cursor_first_batch(mc);
MDBX_page *mp = mc->mc_pg[mc->mc_top];
if (unlikely(mc->mc_flags & C_EOF)) {
if ((unsigned)mc->mc_ki[mc->mc_top] + 1 >= page_numkeys(mp))
return MDBX_NOTFOUND;
mc->mc_flags ^= C_EOF;
}
int ki = mc->mc_ki[mc->mc_top];
mc->mc_ki[mc->mc_top] = (indx_t)++ki;
const int numkeys = page_numkeys(mp);
if (likely(ki >= numkeys)) {
mdbx_debug("%s", "=====> move to next sibling page");
mc->mc_ki[mc->mc_top] = (indx_t)(numkeys - 1);
int err = mdbx_cursor_sibling(mc, SIBLING_RIGHT);
if (unlikely(err != MDBX_SUCCESS)) {
mc->mc_flags |= C_EOF;
return err;
}
mp = mc->mc_pg[mc->mc_top];
mdbx_debug("next page is %" PRIaPGNO ", key index %u", mp->mp_pgno,
mc->mc_ki[mc->mc_top]);
}
return MDBX_SUCCESS;
}
int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs,
size_t limit, MDBX_cursor_op op) {
if (unlikely(mc == NULL || count == NULL || limit < 4))
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
: MDBX_EBADSIGN;
int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
if (unlikely(mc->mc_db->md_flags & MDBX_DUPSORT))
return MDBX_INCOMPATIBLE /* must be a non-dupsort subDB */;
switch (op) {
case MDBX_FIRST:
rc = cursor_first_batch(mc);
break;
case MDBX_NEXT:
rc = cursor_next_batch(mc);
break;
case MDBX_GET_CURRENT:
rc = likely(mc->mc_flags & C_INITIALIZED) ? MDBX_SUCCESS : MDBX_ENODATA;
break;
default:
mdbx_debug("unhandled/unimplemented cursor operation %u", op);
rc = EINVAL;
break;
}
if (unlikely(rc != MDBX_SUCCESS)) {
*count = 0;
return rc;
}
const MDBX_page *const page = mc->mc_pg[mc->mc_top];
const unsigned nkeys = page_numkeys(page);
unsigned i = mc->mc_ki[mc->mc_top], n = 0;
if (unlikely(i >= nkeys)) {
mdbx_cassert(mc, op == MDBX_GET_CURRENT);
mdbx_cassert(mc, mdbx_cursor_on_last(mc) == MDBX_RESULT_TRUE);
*count = 0;
if (mc->mc_flags & C_EOF) {
mdbx_cassert(mc, mdbx_cursor_on_last(mc) == MDBX_RESULT_TRUE);
return MDBX_ENODATA;
}
if (mdbx_cursor_on_last(mc) != MDBX_RESULT_TRUE)
return MDBX_EINVAL /* again MDBX_GET_CURRENT after MDBX_GET_CURRENT */;
mc->mc_flags |= C_EOF;
return MDBX_NOTFOUND;
}
const txnid_t pp_txnid = pp_txnid4chk(page, mc->mc_txn);
do {
if (unlikely(n + 2 > limit)) {
rc = MDBX_RESULT_TRUE;
break;
}
const MDBX_node *leaf = page_node(page, i);
get_key(leaf, &pairs[n]);
rc = mdbx_node_read(mc, leaf, &pairs[n + 1], pp_txnid);
if (unlikely(rc != MDBX_SUCCESS))
break;
n += 2;
} while (++i < nkeys);
mc->mc_ki[mc->mc_top] = (indx_t)i;
*count = n;
return rc;
}
static int mdbx_touch_dbi(MDBX_cursor *mc) {
mdbx_cassert(mc, (*mc->mc_dbistate & DBI_DIRTY) == 0);
*mc->mc_dbistate |= DBI_DIRTY;

View File

@ -285,6 +285,11 @@ bool testcase_hill::run() {
log_notice("hill: reached %d tree depth & %s sub-tree depth(s)",
stat.ms_depth, str.c_str());
}
if ((config.params.table_flags & MDBX_DUPSORT) == 0) {
if (!check_batch_get())
failure("batch-get verification failed");
}
}
while (serial_count > 1) {

View File

@ -1192,3 +1192,69 @@ bool testcase::speculum_verify() {
mdbx_cursor_close(cursor);
return rc;
}
bool testcase::check_batch_get() {
char dump_key[128], dump_value[128];
char dump_key_batch[128], dump_value_batch[128];
MDBX_cursor *cursor;
int err = mdbx_cursor_open(txn_guard.get(), dbi, &cursor);
if (err != MDBX_SUCCESS)
failure_perror("mdbx_cursor_open()", err);
MDBX_cursor *batch_cursor;
err = mdbx_cursor_open(txn_guard.get(), dbi, &batch_cursor);
if (err != MDBX_SUCCESS)
failure_perror("mdbx_cursor_open()", err);
MDBX_val pairs[42];
size_t count = 0xDeadBeef;
err = mdbx_cursor_get_batch(batch_cursor, &count, pairs, ARRAY_LENGTH(pairs),
MDBX_FIRST);
bool rc = true;
size_t i, n = 0;
while (err == MDBX_SUCCESS) {
for (i = 0; i < count; i += 2) {
mdbx::slice key, value;
int err2 = mdbx_cursor_get(cursor, &key, &value, MDBX_NEXT);
if (err2 != MDBX_SUCCESS)
failure_perror("mdbx_cursor_open()", err2);
if (key != pairs[i] || value != pairs[i + 1]) {
log_error(
"batch-get pair mismatch %zu/%zu: sequential{%s, %s} != "
"batch{%s, %s}",
n + i / 2, i, mdbx_dump_val(&key, dump_key, sizeof(dump_key)),
mdbx_dump_val(&value, dump_value, sizeof(dump_value)),
mdbx_dump_val(&pairs[i], dump_key_batch, sizeof(dump_key_batch)),
mdbx_dump_val(&pairs[i + 1], dump_value_batch,
sizeof(dump_value_batch)));
rc = false;
}
}
n += i / 2;
err = mdbx_cursor_get_batch(batch_cursor, &count, pairs,
ARRAY_LENGTH(pairs), MDBX_NEXT);
}
if (err != MDBX_NOTFOUND)
failure_perror("mdbx_cursor_get_batch()", err);
err = mdbx_cursor_eof(batch_cursor);
if (err != MDBX_RESULT_TRUE) {
log_error("batch-get %s cursor not-eof %d", "batch", err);
rc = false;
}
err = mdbx_cursor_on_last(batch_cursor);
if (err != MDBX_RESULT_TRUE) {
log_error("batch-get %s cursor not-on-last %d", "batch", err);
rc = false;
}
err = mdbx_cursor_on_last(cursor);
if (err != MDBX_RESULT_TRUE) {
log_error("batch-get %s cursor not-on-last %d", "checked", err);
rc = false;
}
mdbx_cursor_close(cursor);
mdbx_cursor_close(batch_cursor);
return rc;
}

View File

@ -227,6 +227,7 @@ protected:
const MDBX_val &v) const;
bool speculum_verify();
bool check_batch_get();
int insert(const keygen::buffer &akey, const keygen::buffer &adata,
MDBX_put_flags_t flags);
int replace(const keygen::buffer &akey, const keygen::buffer &new_value,