mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:14:12 +08:00
mdbx: adds functions for distance/move/range estimation (initial).
Change-Id: If59eccf7311123ab6384c4b93f9b1fed5a0a10d1
This commit is contained in:
parent
7d383350e8
commit
8ddfd1f34a
47
mdbx.h
47
mdbx.h
@ -1695,6 +1695,53 @@ LIBMDBX_API int mdbx_cursor_on_first(MDBX_cursor *mc);
|
||||
/* Returns: MDBX_RESULT_TRUE, MDBX_RESULT_FALSE or Error code. */
|
||||
LIBMDBX_API int mdbx_cursor_on_last(MDBX_cursor *mc);
|
||||
|
||||
/* Estimates the distance between cursors as the number of elements.
|
||||
* Both cursors must be initialized for the same DBI.
|
||||
*
|
||||
* [in] cursor_a The first cursor for estimation.
|
||||
* [in] cursor_b The second cursor for estimation.
|
||||
* [out] distance_items A pointer to store estimated distance value,
|
||||
* i.e. *distance_items = distance(a - b).
|
||||
*
|
||||
* Returns A non-zero error value on failure and 0 on success. */
|
||||
LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first,
|
||||
const MDBX_cursor *last,
|
||||
ptrdiff_t *distance_items);
|
||||
|
||||
/* Estimates the move distance, i.e. between the current cursor position and
|
||||
* next position after the specified move-operation with given key and data.
|
||||
* Current cursor position and state are preserved.
|
||||
*
|
||||
* [in] cursor Cursor for estimation.
|
||||
* [in,out] key The key for a retrieved item.
|
||||
* [in,out] data The data of a retrieved item.
|
||||
* [in] op A cursor operation MDBX_cursor_op.
|
||||
* [out] distance_items A pointer to store estimated move distance
|
||||
* as the number of elements.
|
||||
*
|
||||
* Returns A non-zero error value on failure and 0 on success. */
|
||||
LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key,
|
||||
MDBX_val *data, MDBX_cursor_op move_op,
|
||||
ptrdiff_t *distance_items);
|
||||
|
||||
/* Estimates the size of a range in the number of elements.
|
||||
*
|
||||
* [in] txn A transaction handle returned by mdbx_txn_begin().
|
||||
* [in] dbi A database handle returned by mdbx_dbi_open().
|
||||
* [in] begin_key The key of range beginning or NULL for explicit FIRST.
|
||||
* [in] begin_data Optional additional data to seeking among sorted
|
||||
* duplicates. Only for MDBX_DUPSORT, NULL otherwise.
|
||||
* [in] end_key The key of range ending or NULL for explicit LAST.
|
||||
* [in] end_data Optional additional data to seeking among sorted
|
||||
* duplicates. Only for MDBX_DUPSORT, NULL otherwise.
|
||||
* [out] distance_items A pointer to store range estimation result.
|
||||
*
|
||||
* Returns A non-zero error value on failure and 0 on success. */
|
||||
LIBMDBX_API int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi,
|
||||
MDBX_val *begin_key, MDBX_val *begin_data,
|
||||
MDBX_val *end_key, MDBX_val *end_data,
|
||||
ptrdiff_t *size_items);
|
||||
|
||||
LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
|
||||
MDBX_val *new_data, MDBX_val *old_data,
|
||||
unsigned flags);
|
||||
|
355
src/mdbx.c
355
src/mdbx.c
@ -13163,11 +13163,366 @@ int mdbx_cursor_eof(MDBX_cursor *mc) {
|
||||
return MDBX_RESULT_FALSE;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
struct diff_result {
|
||||
ptrdiff_t diff;
|
||||
int level;
|
||||
int root_nkeys;
|
||||
};
|
||||
|
||||
static int cursor_diff(const MDBX_cursor *const __restrict first,
|
||||
const MDBX_cursor *const __restrict last,
|
||||
struct diff_result *const __restrict dr) {
|
||||
dr->root_nkeys = 0;
|
||||
dr->level = 0;
|
||||
dr->diff = 0;
|
||||
|
||||
if (unlikely(first->mc_signature != MDBX_MC_SIGNATURE ||
|
||||
last->mc_signature != MDBX_MC_SIGNATURE))
|
||||
return MDBX_EBADSIGN;
|
||||
|
||||
if (unlikely(first->mc_dbi != last->mc_dbi))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
if (unlikely(!(first->mc_flags & last->mc_flags & C_INITIALIZED)))
|
||||
return MDBX_ENODATA;
|
||||
|
||||
while (likely(dr->level < first->mc_snum && dr->level < last->mc_snum)) {
|
||||
if (unlikely(first->mc_pg[dr->level] != last->mc_pg[dr->level]))
|
||||
return MDBX_PROBLEM;
|
||||
|
||||
int nkeys = NUMKEYS(first->mc_pg[dr->level]);
|
||||
assert(nkeys > 0);
|
||||
if (dr->level == 0)
|
||||
dr->root_nkeys = nkeys;
|
||||
|
||||
int max_ki = nkeys - 1;
|
||||
int last_ki = last->mc_ki[dr->level];
|
||||
int first_ki = first->mc_ki[dr->level];
|
||||
dr->diff = ((last_ki < max_ki) ? last_ki : max_ki) -
|
||||
((first_ki < max_ki) ? first_ki : max_ki);
|
||||
if (dr->diff == 0) {
|
||||
dr->level += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
while (unlikely(dr->diff == 1) && likely(dr->level + 1 < first->mc_snum &&
|
||||
dr->level + 1 < last->mc_snum)) {
|
||||
dr->level += 1;
|
||||
/* DB'PAGEs: 0------------------>MAX
|
||||
*
|
||||
* CURSORs: first < last
|
||||
* STACK[i ]: |
|
||||
* STACK[+1]: ...f++N|0++l...
|
||||
*/
|
||||
nkeys = NUMKEYS(first->mc_pg[dr->level]);
|
||||
dr->diff = (nkeys - first->mc_ki[dr->level]) + last->mc_ki[dr->level];
|
||||
assert(dr->diff > 0);
|
||||
}
|
||||
|
||||
while (unlikely(dr->diff == -1) && likely(dr->level + 1 < first->mc_snum &&
|
||||
dr->level + 1 < last->mc_snum)) {
|
||||
dr->level += 1;
|
||||
/* DB'PAGEs: 0------------------>MAX
|
||||
*
|
||||
* CURSORs: last < first
|
||||
* STACK[i ]: |
|
||||
* STACK[+1]: ...l--N|0--f...
|
||||
*/
|
||||
nkeys = NUMKEYS(last->mc_pg[dr->level]);
|
||||
dr->diff = -(nkeys - last->mc_ki[dr->level]) - first->mc_ki[dr->level];
|
||||
assert(dr->diff < 0);
|
||||
}
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
dr->diff = mdbx_cmp2int(last->mc_flags & C_EOF, first->mc_flags & C_EOF);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
static ptrdiff_t estimate(const MDBX_db *db,
|
||||
struct diff_result *const __restrict dr) {
|
||||
/* root: branch-page => scale = leaf-factor * branch-factor(N-1)
|
||||
* level-1: branch-page(s) => scale = leaf-factor * branch-factor^2
|
||||
* level-2: branch-page(s) => scale = leaf-factor * branch-factor
|
||||
* level-N: branch-page(s) => scale = leaf-factor
|
||||
* last-level: leaf-page(s) => scale = 1
|
||||
*/
|
||||
ptrdiff_t btree_power = db->md_depth - 2 - dr->level;
|
||||
if (btree_power < 0)
|
||||
return dr->diff;
|
||||
|
||||
ptrdiff_t estimated =
|
||||
(ptrdiff_t)db->md_entries * dr->diff / (ptrdiff_t)db->md_leaf_pages;
|
||||
if (btree_power == 0)
|
||||
return estimated;
|
||||
|
||||
if (db->md_depth < 4) {
|
||||
assert(dr->level == 0 && btree_power == 1);
|
||||
return (ptrdiff_t)db->md_entries * dr->diff / (ptrdiff_t)dr->root_nkeys;
|
||||
}
|
||||
|
||||
/* average_branch_fillfactor = total(branch_entries) / branch_pages
|
||||
* total(branch_entries) = leaf_pages + branch_pages - 1 (root page) */
|
||||
const size_t log2_fixedpoint = 3;
|
||||
const size_t half = UINT64_C(1) << (log2_fixedpoint - 1);
|
||||
const size_t factor =
|
||||
((db->md_leaf_pages + db->md_branch_pages - 1) << log2_fixedpoint) /
|
||||
db->md_branch_pages;
|
||||
while (1) {
|
||||
switch ((size_t)btree_power) {
|
||||
default: {
|
||||
const size_t square = (factor * factor + half) >> log2_fixedpoint;
|
||||
const size_t quad = (square * square + half) >> log2_fixedpoint;
|
||||
do {
|
||||
estimated = estimated * quad + half;
|
||||
estimated >>= log2_fixedpoint;
|
||||
btree_power -= 4;
|
||||
} while (btree_power >= 4);
|
||||
continue;
|
||||
}
|
||||
case 3:
|
||||
estimated = estimated * factor + half;
|
||||
estimated >>= log2_fixedpoint;
|
||||
__fallthrough /* fall through */;
|
||||
case 2:
|
||||
estimated = estimated * factor + half;
|
||||
estimated >>= log2_fixedpoint;
|
||||
__fallthrough /* fall through */;
|
||||
case 1:
|
||||
estimated = estimated * factor + half;
|
||||
estimated >>= log2_fixedpoint;
|
||||
__fallthrough /* fall through */;
|
||||
case 0:
|
||||
if (unlikely(estimated > (ptrdiff_t)db->md_entries))
|
||||
return (ptrdiff_t)db->md_entries;
|
||||
if (unlikely(estimated < -(ptrdiff_t)db->md_entries))
|
||||
return -(ptrdiff_t)db->md_entries;
|
||||
return estimated;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last,
|
||||
ptrdiff_t *distance_items) {
|
||||
if (unlikely(first == NULL || last == NULL || distance_items == NULL))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
*distance_items = 0;
|
||||
struct diff_result dr;
|
||||
int rc = cursor_diff(first, last, &dr);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
if (unlikely(dr.diff == 0) &&
|
||||
F_ISSET(first->mc_db->md_flags & first->mc_db->md_flags,
|
||||
MDBX_DUPSORT | C_INITIALIZED)) {
|
||||
first = &first->mc_xcursor->mx_cursor;
|
||||
last = &last->mc_xcursor->mx_cursor;
|
||||
rc = cursor_diff(first, last, &dr);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (likely(dr.diff != 0))
|
||||
*distance_items = estimate(first->mc_db, &dr);
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data,
|
||||
MDBX_cursor_op move_op, ptrdiff_t *distance_items) {
|
||||
if (unlikely(cursor == NULL || distance_items == NULL ||
|
||||
move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
if (unlikely(cursor->mc_signature != MDBX_MC_SIGNATURE))
|
||||
return MDBX_EBADSIGN;
|
||||
|
||||
if (!(cursor->mc_flags & C_INITIALIZED))
|
||||
return MDBX_ENODATA;
|
||||
|
||||
MDBX_cursor_couple next;
|
||||
mdbx_cursor_copy(cursor, &next.outer);
|
||||
next.outer.mc_xcursor = NULL;
|
||||
if (cursor->mc_db->md_flags & MDBX_DUPSORT) {
|
||||
next.outer.mc_xcursor = &next.inner;
|
||||
int rc = mdbx_xcursor_init0(&next.outer);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
MDBX_xcursor *mx = &container_of(cursor, MDBX_cursor_couple, outer)->inner;
|
||||
mdbx_cursor_copy(&mx->mx_cursor, &next.inner.mx_cursor);
|
||||
}
|
||||
|
||||
MDBX_val stub = {0, 0};
|
||||
if (data == NULL) {
|
||||
const unsigned mask =
|
||||
1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY;
|
||||
if (unlikely(mask & (1 << move_op)))
|
||||
return MDBX_EINVAL;
|
||||
data = &stub;
|
||||
}
|
||||
|
||||
if (key == NULL) {
|
||||
const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE |
|
||||
1 << MDBX_SET_KEY | 1 << MDBX_SET |
|
||||
1 << MDBX_SET_RANGE;
|
||||
if (unlikely(mask & (1 << move_op)))
|
||||
return MDBX_EINVAL;
|
||||
key = &stub;
|
||||
}
|
||||
|
||||
int rc = mdbx_cursor_get(&next.outer, key, data, move_op);
|
||||
if (unlikely(rc != MDBX_SUCCESS &&
|
||||
(rc != MDBX_NOTFOUND || !(next.outer.mc_flags & C_INITIALIZED))))
|
||||
return rc;
|
||||
|
||||
return mdbx_estimate_distance(cursor, &next.outer, distance_items);
|
||||
}
|
||||
|
||||
static int mdbx_is_samedata(const MDBX_val *a, const MDBX_val *b) {
|
||||
return a->iov_len == b->iov_len &&
|
||||
memcmp(a->iov_base, b->iov_base, a->iov_len) == 0;
|
||||
}
|
||||
|
||||
int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key,
|
||||
MDBX_val *begin_data, MDBX_val *end_key,
|
||||
MDBX_val *end_data, ptrdiff_t *size_items) {
|
||||
|
||||
if (unlikely(!txn || !size_items))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
if (unlikely(!begin_key && begin_data))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
if (unlikely(!end_key && end_data))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
|
||||
return MDBX_EBADSIGN;
|
||||
|
||||
if (unlikely(txn->mt_owner != mdbx_thread_self()))
|
||||
return MDBX_THREAD_MISMATCH;
|
||||
|
||||
if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED))
|
||||
return MDBX_BAD_TXN;
|
||||
|
||||
MDBX_cursor_couple begin;
|
||||
/* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */
|
||||
int rc = mdbx_cursor_init(&begin.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
if (unlikely(begin.outer.mc_db->md_entries == 0)) {
|
||||
*size_items = 0;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
MDBX_val origin_begin_key, origin_begin_data;
|
||||
if (!begin_key) {
|
||||
if (unlikely(!end_key)) {
|
||||
/* LY: FIRST..LAST case */
|
||||
*size_items = (ptrdiff_t)begin.outer.mc_db->md_entries;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
MDBX_val stub = {0, 0};
|
||||
rc = mdbx_cursor_first(&begin.outer, &stub, &stub);
|
||||
} else {
|
||||
if (end_key && !begin_data && !end_data &&
|
||||
(begin_key == end_key || mdbx_is_samedata(begin_key, end_key))) {
|
||||
/* LY: single key case */
|
||||
int exact = 0;
|
||||
rc = mdbx_cursor_set(&begin.outer, begin_key, NULL, MDBX_SET, &exact);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
*size_items = 0;
|
||||
return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc;
|
||||
}
|
||||
*size_items = 1;
|
||||
if (begin.outer.mc_xcursor != NULL) {
|
||||
MDBX_node *leaf = NODEPTR(begin.outer.mc_pg[begin.outer.mc_top],
|
||||
begin.outer.mc_ki[begin.outer.mc_top]);
|
||||
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
||||
/* LY: return the number of duplicates for given key */
|
||||
mdbx_tassert(txn,
|
||||
begin.outer.mc_xcursor == &begin.inner &&
|
||||
(begin.inner.mx_cursor.mc_flags & C_INITIALIZED));
|
||||
*size_items =
|
||||
(sizeof(*size_items) >= sizeof(begin.inner.mx_db.md_entries) ||
|
||||
begin.inner.mx_db.md_entries <= SIZE_MAX)
|
||||
? (size_t)begin.inner.mx_db.md_entries
|
||||
: SIZE_MAX;
|
||||
}
|
||||
}
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
MDBX_cursor_op begin_op = MDBX_SET_RANGE;
|
||||
if (begin_data) {
|
||||
begin_op = MDBX_GET_BOTH_RANGE;
|
||||
origin_begin_data = *begin_data;
|
||||
}
|
||||
origin_begin_key = *begin_key;
|
||||
rc = mdbx_cursor_set(&begin.outer, begin_key, begin_data, begin_op, NULL);
|
||||
}
|
||||
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (rc != MDBX_NOTFOUND || !(begin.outer.mc_flags & C_INITIALIZED))
|
||||
return rc;
|
||||
}
|
||||
|
||||
MDBX_cursor_couple end;
|
||||
rc = mdbx_cursor_init(&end.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
MDBX_val origin_end_key, origin_end_data;
|
||||
if (!end_key) {
|
||||
MDBX_val stub = {0, 0};
|
||||
rc = mdbx_cursor_last(&end.outer, &stub, &stub);
|
||||
} else {
|
||||
MDBX_cursor_op end_op = MDBX_SET_RANGE;
|
||||
if (end_data) {
|
||||
end_op = MDBX_GET_BOTH_RANGE;
|
||||
origin_end_data = *end_data;
|
||||
}
|
||||
origin_end_key = *end_key;
|
||||
rc = mdbx_cursor_set(&end.outer, end_key, end_data, end_op, NULL);
|
||||
}
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (rc != MDBX_NOTFOUND || !(end.outer.mc_flags & C_INITIALIZED))
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = mdbx_estimate_distance(&begin.outer, &end.outer, size_items);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
assert(*size_items >= -(ptrdiff_t)begin.outer.mc_db->md_entries &&
|
||||
*size_items <= (ptrdiff_t)begin.outer.mc_db->md_entries);
|
||||
if (*size_items < 0) {
|
||||
/* LY: inverted range case */
|
||||
*size_items += (ptrdiff_t)begin.outer.mc_db->md_entries;
|
||||
} else if (*size_items == 0 && begin_key && end_key) {
|
||||
int cmp = begin.outer.mc_dbx->md_cmp(&origin_begin_key, &origin_end_key);
|
||||
if (cmp == 0 && (begin.inner.mx_cursor.mc_flags & C_INITIALIZED) &&
|
||||
begin_data && end_data)
|
||||
cmp = begin.outer.mc_dbx->md_dcmp(&origin_begin_data, &origin_end_data);
|
||||
if (cmp > 0) {
|
||||
/* LY: inverted range case with empty scope */
|
||||
*size_items = (ptrdiff_t)begin.outer.mc_db->md_entries;
|
||||
}
|
||||
}
|
||||
|
||||
assert(*size_items >= 0 &&
|
||||
*size_items <= (ptrdiff_t)begin.outer.mc_db->md_entries);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/* Позволяет обновить или удалить существующую запись с получением
|
||||
* в old_data предыдущего значения данных. При этом если new_data равен
|
||||
* нулю, то выполняется удаление, иначе обновление/вставка.
|
||||
|
Loading…
x
Reference in New Issue
Block a user