From 20447c877dab185e5e3a6d0f856d074e78ebc906 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Thu, 26 Sep 2019 20:00:22 +0300 Subject: [PATCH] mdbx: refine mdbx.h and API description, etc (5 of 5). Change-Id: I851736cccb4d0271d6f351e1358cc5c4e1257bb4 --- .gitignore | 2 +- mdbx.h | 327 ++++++++++++++++++++++++++++++++-------- src/elements/core.c | 51 ++++--- src/tools/mdbx_stat.c | 4 +- test/long_stochastic.sh | 2 +- test/test.cc | 6 +- test/test.h | 4 +- 7 files changed, 304 insertions(+), 92 deletions(-) diff --git a/.gitignore b/.gitignore index b9ac472f..0496b621 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,7 @@ cmake-build-* @* core -example +mdbx_example libmdbx.creator.user mdbx_chk mdbx_copy diff --git a/mdbx.h b/mdbx.h index 202e3018..d7ca7c0a 100644 --- a/mdbx.h +++ b/mdbx.h @@ -740,7 +740,7 @@ typedef struct iovec MDBX_val; * MDBX only store a 32 bit value for node sizes. */ #define MDBX_MAXDATASIZE INT32_MAX -/*** DEBUG & LOGGING ********************************************************** +/**** DEBUG & LOGGING ********************************************************** * Logging and runtime debug flags. * * NOTE: Most of debug feature enabled only if libmdbx builded with MDBX_DEBUG. @@ -1237,7 +1237,7 @@ LIBMDBX_API char *mdbx_dump_val(const MDBX_val *key, char *const buf, /* Compacting: Omit free space from copy, and renumber all pages sequentially */ #define MDBX_CP_COMPACT 1u -/*** CURSOR OPERATIONS ********************************************************* +/**** CURSOR OPERATIONS ******************************************************** * * This is the set of all operations for retrieving data * using a cursor. */ @@ -1273,7 +1273,7 @@ typedef enum MDBX_cursor_op { * return up to a page of duplicate data items. */ } MDBX_cursor_op; -/*** ERRORS & RETURN CODES ***************************************************** +/**** ERRORS & RETURN CODES **************************************************** * BerkeleyDB uses -30800 to -30999, we'll go under them */ /* Successful result */ @@ -2085,7 +2085,8 @@ LIBMDBX_API void *mdbx_env_get_userctx(MDBX_env *env); * - MDBX_READERS_FULL = a read-only transaction was requested and the reader * lock table is full. See mdbx_env_set_maxreaders(). * - MDBX_ENOMEM = out of memory. - * - MDBX_BUSY = a write transaction is already started. */ + * - MDBX_BUSY = the write transaction is already started by the + * current thread. */ LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags, MDBX_txn **txn); @@ -2152,7 +2153,7 @@ typedef struct MDBX_txn_info { * Returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_txn_info(MDBX_txn *txn, MDBX_txn_info *info, int scan_rlt); -/* Returns the transaction's MDBX_env +/* Returns the transaction's MDBX_env. * * [in] txn A transaction handle returned by mdbx_txn_begin() */ LIBMDBX_API MDBX_env *mdbx_txn_env(MDBX_txn *txn); @@ -2244,13 +2245,42 @@ LIBMDBX_API int mdbx_txn_reset(MDBX_txn *txn); * - MDBX_EINVAL = an invalid parameter was specified. */ LIBMDBX_API int mdbx_txn_renew(MDBX_txn *txn); -/* FIXME: Complete description */ +/* The fours integers markers (aka "canary") associated with the environment. + * + * The `x`, `y` and `z` values could be set by mdbx_canary_put(), while the 'v' + * will be always set to the transaction number. Updated values becomes visible + * outside the current transaction only after it was committed. Current values + * could be retrieved by mdbx_canary_get(). */ typedef struct mdbx_canary { uint64_t x, y, z, v; } mdbx_canary; -/* FIXME: Complete description */ +/* Set integers markers (aka "canary") associated with the environment. + * + * [in] txn A transaction handle returned by mdbx_txn_begin() + * [in] canary A optional pointer to mdbx_canary structure for `x`, `y` + * and `z` values from. + * - If canary is NOT NULL then the `x`, `y` and `z` values will be + * updated from given canary argument, but the 'v' be always set + * to the current transaction number if at least one `x`, `y` or + * `z` values have changed (i.e. if `x`, `y` and `z` have the same + * values as currently present then nothing will be changes or + * updated). + * - if canary is NULL then the `v` value will be explicitly update + * to the current transaction number without changes `x`, `y` nor + * `z`. + * + * Returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_canary_put(MDBX_txn *txn, const mdbx_canary *canary); + +/* Returns fours integers markers (aka "canary") associated with the + * environment. + * + * [in] txn A transaction handle returned by mdbx_txn_begin() + * [in] canary The address of an mdbx_canary structure where the information + * will be copied. + * + * Returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_canary_get(MDBX_txn *txn, mdbx_canary *canary); /* A callback function used to compare two keys in a database */ @@ -2389,7 +2419,7 @@ LIBMDBX_API int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags); * the DB name is gone). * * Closing a database handle is not necessary, but lets mdbx_dbi_open() reuse - * the handle value. Usually it's better to set a bigger mdbx_env_set_maxdbs(), + * the handle value. Usually it's better to set a bigger mdbx_env_set_maxdbs(), * unless that value would be large. * * [in] env An environment handle returned by mdbx_env_create() @@ -2437,23 +2467,56 @@ LIBMDBX_API int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del); LIBMDBX_API int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data); -/* FIXME: Complete description +/* Get items from a database and optionaly number of data items for a given key. * - * Same as mdbx_get() with a few differences: - * 1) If values_count is not NULL, then returns the count - * of multi-values/duplicates for a given key. - * 2) Updates the key for pointing to the actual key's data inside DB. */ + * Briefly this function does the same as mdbx_get() with a few differences: + * 1. If values_count is NOT NULL, then returns the count + * of multi-values/duplicates for a given key. + * 2. Updates BOTH the key and the data for pointing to the actual key-value + * pair inside the database. + * + * [in] txn A transaction handle returned by mdbx_txn_begin() + * [in] dbi A database handle returned by mdbx_dbi_open() + * [in,out] key The key to search for in the database + * [in,out] data The data corresponding to the key + * [out] values_count The optional address to return number of values + * associated with given key, i.e. + * = 0 - in case MDBX_NOTFOUND error; + * = 1 - exactly for databases WITHOUT MDBX_DUPSORT; + * >= 1 for databases WITH MDBX_DUPSORT. + * + * Returns A non-zero error value on failure and 0 on success, some + * possible errors are: + * - MDBX_NOTFOUND = the key was not in the database. + * - MDBX_EINVAL = an invalid parameter was specified. */ LIBMDBX_API int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count); -/* FIXME: Complete description + +/* Get nearest items from a database. * - * Same as mdbx_get() with a few differences: - * 1) Internally uses MDBX_GET_BOTH or MDBX_SET_KEY, instead of MDBX_SET, - * i.e. return nearest value, but not only exactly matching with key. - * 2) On success return MDBX_SUCCESS if key found exactly, - * and MDBX_RESULT_TRUE otherwise. */ -LIBMDBX_API int mdbx_get2(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *data); + * Briefly this function does the same as mdbx_get() with a few differences: + * 1. Return nearest (i.e. equal or great due comparison function) key-value + * pair, but not only exactly matching with the key. + * 2. On success return MDBX_SUCCESS if key found exactly, + * and MDBX_RESULT_TRUE otherwise. Moreover, for databases with MDBX_DUPSORT + * flag the data argument also will be used to match over + * multi-value/duplicates, and MDBX_SUCCESS will be returned only when BOTH + * the key and the data match exactly. + * 3. Updates BOTH the key and the data for pointing to the actual key-value + * pair inside the database. + * + * [in] txn A transaction handle returned by mdbx_txn_begin() + * [in] dbi A database handle returned by mdbx_dbi_open() + * [in,out] key The key to search for in the database + * [in,out] data The data corresponding to the key + * + * Returns A non-zero error value on failure and MDBX_RESULT_TRUE (0) or + * MDBX_RESULT_TRUE on success (as described above). + * Some possible errors are: + * - MDBX_NOTFOUND = the key was not in the database. + * - MDBX_EINVAL = an invalid parameter was specified. */ +LIBMDBX_API int mdbx_get_nearest(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *data); /* Store items into a database. * @@ -2462,13 +2525,13 @@ LIBMDBX_API int mdbx_get2(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, * if duplicates are disallowed, or adding a duplicate data item if * duplicates are allowed (MDBX_DUPSORT). * - * [in] txn A transaction handle returned by mdbx_txn_begin() - * [in] dbi A database handle returned by mdbx_dbi_open() - * [in] key The key to store in the database - * [in,out] data The data to store - * [in] flags Special options for this operation. This parameter must be - * set to 0 or by bitwise OR'ing together one or more of the - * values described here. + * [in] txn A transaction handle returned by mdbx_txn_begin() + * [in] dbi A database handle returned by mdbx_dbi_open() + * [in] key The key to store in the database + * [in,out] data The data to store + * [in] flags Special options for this operation. This parameter must be + * set to 0 or by bitwise OR'ing together one or more of the + * values described here. * * - MDBX_NODUPDATA * Enter the new key/data pair only if it does not already appear @@ -2516,6 +2579,46 @@ LIBMDBX_API int mdbx_get2(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, LIBMDBX_API int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, unsigned flags); +/* Replace items in a database. + * + * This function allows to update or delete an existing value at the same time + * as the previous value is retrieved. If the argument new_data equal is NULL + * zero, the removal is performed, otherwise the update/insert. + * + * The current value may be in an already changed (aka dirty) page. In this + * case, the page will be overwritten during the update, and the old value will + * be lost. Therefore, an additional buffer must be passed via old_data argument + * initially to copy the old value. If the buffer passed in is too small, the + * function will return MDBX_RESULT_TRUE (-1) by setting iov_len field pointed + * by old_data argument to the appropriate value, without performing any + * changes. + * + * For databases with non-unique keys (i.e. with MDBX_DUPSORT flag), another use + * case is also possible, when by old_data argument selects a specific item from + * multi-value/duplicates with the same key for deletion or update. To select + * this scenario in flags should simultaneously specify MDBX_CURRENT and + * MDBX_NOOVERWRITE. This combination is chosen because it makes no sense, and + * thus allows you to identify the request of such a scenario. + * + * [in] txn A transaction handle returned by mdbx_txn_begin(). + * [in] dbi A database handle returned by mdbx_dbi_open(). + * [in] key The key to store in the database. + * [in,out] new_data The data to store, if NULL then deletion will be + * performed. + * [in,out] old_data The buffer for retrieve previous value as described + * above. + * [in] flags Special options for this operation. This parameter must + * be set to 0 or by bitwise OR'ing together one or more of + * the values described in mdbx_put() description above, + * and additionally (MDBX_CURRENT | MDBX_NOOVERWRITE) + * combination for selection particular item from + * multi-value/duplicates. + * + * Returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *new_data, MDBX_val *old_data, + unsigned flags); + /* Delete items from a database. * * This function removes key/data pairs from the database. @@ -2611,8 +2714,8 @@ LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(MDBX_cursor *cursor); * * Returns A non-zero error value on failure and 0 on success, some * possible errors are: - * - MDBX_NOTFOUND - no matching key found. - * - MDBX_EINVAL - an invalid parameter was specified. */ + * - MDBX_NOTFOUND = no matching key found. + * - MDBX_EINVAL = an invalid parameter was specified. */ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); @@ -2685,7 +2788,7 @@ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, LIBMDBX_API int mdbx_cursor_put(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, unsigned flags); -/* Delete current key/data pair +/* Delete current key/data pair. * * This function deletes the key/data pair to which the cursor refers. This does * not invalidate the cursor, so operations such as MDBX_NEXT can still be used @@ -2708,8 +2811,8 @@ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, unsigned flags); /* Return count of duplicates for current key. * - * This call is only valid on databases that support sorted duplicate data - * items MDBX_DUPSORT. + * This call is valid for all databases, but reasonable only for that support + * sorted duplicate data items MDBX_DUPSORT. * * [in] cursor A cursor handle returned by mdbx_cursor_open() * [out] countp Address where the count will be stored @@ -2720,28 +2823,75 @@ LIBMDBX_API int mdbx_cursor_del(MDBX_cursor *cursor, unsigned flags); * was specified. */ LIBMDBX_API int mdbx_cursor_count(MDBX_cursor *cursor, size_t *countp); -/* FIXME: Complete description +/* Determines whether the cursor is pointed to a key-value pair or not, + * i.e. was not positioned or points to the end of data. + * + * [in] cursor A cursor handle returned by mdbx_cursor_open() + * * Returns: - * - MDBX_RESULT_TRUE - * when no more data available or cursor not positioned; - * - MDBX_RESULT_FALSE - * when data available; + * - MDBX_RESULT_TRUE = no more data available or cursor not positioned; + * - MDBX_RESULT_FALSE = data available; * - Otherwise the error code. */ LIBMDBX_API int mdbx_cursor_eof(MDBX_cursor *mc); -/* Returns: MDBX_RESULT_TRUE, MDBX_RESULT_FALSE or Error code. */ +/* Determines whether the cursor is pointed to the first key-value pair or not. + * + * [in] cursor A cursor handle returned by mdbx_cursor_open() + * + * Returns: + * - MDBX_RESULT_TRUE = cursor positioned to the first key-value pair. + * - MDBX_RESULT_FALSE = cursor NOT positioned to the first key-value pair. + * - Otherwise the error code. */ LIBMDBX_API int mdbx_cursor_on_first(MDBX_cursor *mc); -/* Returns: MDBX_RESULT_TRUE, MDBX_RESULT_FALSE or Error code. */ +/* Determines whether the cursor is pointed to the last key-value pair or not. + * + * [in] cursor A cursor handle returned by mdbx_cursor_open() + * + * Returns: + * - MDBX_RESULT_TRUE = cursor positioned to the last key-value pair. + * - MDBX_RESULT_FALSE = cursor NOT positioned to the last key-value pair. + * - Otherwise the error code. */ LIBMDBX_API int mdbx_cursor_on_last(MDBX_cursor *mc); -/* Estimates the distance between cursors as a number of elements. - * Both cursors must be initialized for the same DBI. +/* Estimates the distance between cursors as a number of elements. The results + * of such estimation can be used to build and/or optimize query execution + * plans. * - * [in] cursor_a The first cursor for estimation. - * [in] cursor_b The second cursor for estimation. + * This function performs a rough estimate based only on b-tree pages that are + * common for the both cursor's stacks. + * + * NOTE: The result varies greatly depending on the filling of specific pages + * and the overall balance of the b-tree: + * + * 1. The number of items is estimated by analyzing the height and fullness of + * the b-tree. The accuracy of the result directly depends on the balance of the + * b-tree, which in turn is determined by the history of previous insert/delete + * operations and the nature of the data (i.e. variability of keys length and so + * on). Therefore, the accuracy of the estimation can vary greatly in a + * particular situation. + * + * 2. To understand the potential spread of results, you should consider a + * possible situations basing on the general criteria for splitting and merging + * b-tree pages: + * - the page is split into two when there is no space for added data; + * - two pages merge if the result fits in half a page; + * - thus, the b-tree can consist of an arbitrary combination of pages filled + * both completely and only 1/4. Therefore, in the worst case, the result + * can diverge 4 times for each level of the b-tree excepting the first and + * the last. + * + * 3. In practice, the probability of extreme cases of the above situation is + * close to zero and in most cases the error does not exceed a few percent. On + * the other hand, it's just a chance you shouldn't overestimate. + * + * Both cursors must be initialized for the same database and the same + * transaction. + * + * [in] first The first cursor for estimation. + * [in] last The second cursor for estimation. * [out] distance_items A pointer to store estimated distance value, - * i.e. *distance_items = distance(a - b). + * i.e. *distance_items = distance(first, last). * * Returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first, @@ -2750,7 +2900,11 @@ LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first, /* Estimates the move distance, i.e. between the current cursor position and * next position after the specified move-operation with given key and data. - * Current cursor position and state are preserved. + * The results of such estimation can be used to build and/or optimize query + * execution plans. Current cursor position and state are preserved. + * + * Please see notes on accuracy of the result in mdbx_estimate_distance() + * description above. * * [in] cursor Cursor for estimation. * [in,out] key The key for a retrieved item. @@ -2764,7 +2918,12 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op move_op, ptrdiff_t *distance_items); -/* Estimates the size of a range as a number of elements. +/* Estimates the size of a range as a number of elements. The results + * of such estimation can be used to build and/or optimize query execution + * plans. + * + * Please see notes on accuracy of the result in mdbx_estimate_distance() + * description above. * * [in] txn A transaction handle returned by mdbx_txn_begin(). * [in] dbi A database handle returned by mdbx_dbi_open(). @@ -2783,15 +2942,57 @@ LIBMDBX_API int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *end_key, MDBX_val *end_data, ptrdiff_t *size_items); -/* FIXME: Complete description */ -LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, - MDBX_val *new_data, MDBX_val *old_data, - unsigned flags); - -/* FIXME: Complete description */ +/* Determines whether the given address is on a dirty database page of the + * transaction or not. Ultimately, this allows to avoid copy data from non-dirty + * pages. + * + * "Dirty" pages are those that have already been changed during a write + * transaction. Accordingly, any further changes may result in such pages being + * overwritten. Therefore, all functions libmdbx performing changes inside the + * database as arguments should NOT get pointers to data in those pages. In + * turn, "not dirty" pages before modification will be copied. + * + * In other words, data from dirty pages must either be copied before being + * passed as arguments for further processing or rejected at the argument + * validation stage. Thus, mdbx_is_dirty() allows you to get rid of unnecessary + * copying, and perform a more complete check of the arguments. + * + * NOTE: The address passed must point to the beginning of the data. This is the + * only way to ensure that the actual page header is physically located in the + * same memory page, including for multi-pages with long data. + * + * NOTE: In rare cases the function may return a false positive answer + * (DBX_RESULT_TRUE when data is NOT on a dirty page), but never a false + * negative if the arguments are correct. + * + * [in] txn A transaction handle returned by mdbx_txn_begin(). + * [in] ptr The address of data to check. + * + * Returns: + * - MDBX_RESULT_TRUE = given address is on the dirty page. + * - MDBX_RESULT_FALSE = given address is NOT on the dirty page. + * - Otherwise the error code. */ LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr); -/* FIXME: Complete description */ +/* Sequence generation for a database. + * + * The function allows to create a linear sequence of unique positive integers + * for each database. The function can be called for a read transaction to + * retrieve the current sequence value, and the increment must be zero. + * Sequence changes become visible outside the current write transaction after + * it is committed, and discarded on abort. + * + * [in] txn A transaction handle returned by mdbx_txn_begin(). + * [in] dbi A database handle returned by mdbx_dbi_open(). + * [out] result The optional address where the value of sequence before the + * change will be stored. + * [in] increment Value to increase the sequence, + * must be 0 for read-only transactions. + * + * Returns A non-zero error value on failure and 0 on success, some + * possible errors are: + * - MDBX_RESULT_TRUE = Increasing the sequence has resulted in an overflow + * and therefore cannot be executed. */ LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment); @@ -2846,7 +3047,7 @@ LIBMDBX_API int mdbx_dcmp(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, typedef int(MDBX_reader_list_func)(void *ctx, int num, int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, uint64_t lag, size_t bytes_used, - size_t bytes_retired); + size_t bytes_retained); /* Enumarete the entries in the reader lock table. * @@ -2894,8 +3095,8 @@ LIBMDBX_API int mdbx_txn_straggler(MDBX_txn *txn, int *percent); * 0 should wait or retry, * 1 drop reader txn-lock (reading-txn was aborted), * >1 drop reader registration (reader process was killed). */ -typedef int(MDBX_oom_func)(MDBX_env *env, int pid, mdbx_tid_t tid, uint64_t txn, - unsigned gap, int retry); +typedef int(MDBX_oom_func)(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid, + uint64_t txn, unsigned gap, int retry); /* Set the OOM callback. * @@ -2919,7 +3120,11 @@ LIBMDBX_API int mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oom_func); * Returns A MDBX_oom_func function or NULL if disabled. */ LIBMDBX_API MDBX_oom_func *mdbx_env_get_oomfunc(MDBX_env *env); -/* FIXME: Complete description */ +/**** B-tree Traversal ********************************************************* + * This is internal API for mdbx_chk tool. You should avoid to use it, except + * some extremal special cases. */ + +/* Page types for traverse the b-tree. */ typedef enum { MDBX_page_void, MDBX_page_meta, @@ -2935,17 +3140,19 @@ typedef enum { #define MDBX_PGWALK_GC ((const char *)((ptrdiff_t)-1)) #define MDBX_PGWALK_META ((const char *)((ptrdiff_t)-2)) +/* Callback function for traverse the b-tree. */ typedef int MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, const int deep, const char *const dbi, const size_t page_size, const MDBX_page_type_t type, const size_t nentries, const size_t payload_bytes, const size_t header_bytes, const size_t unused_bytes); -/* FIXME: Complete description */ + +/* B-tree traversal function. */ LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, void *ctx); -/*** Attribute support functions for Nexenta **********************************/ +/**** Attribute support functions for Nexenta *********************************/ #ifdef MDBX_NEXENTA_ATTRS typedef uint_fast64_t mdbx_attr_t; diff --git a/src/elements/core.c b/src/elements/core.c index a8abfcfd..56f0d80c 100644 --- a/src/elements/core.c +++ b/src/elements/core.c @@ -37,8 +37,8 @@ #include "internals.h" -/*----------------------------------------------------------------------------*/ -/* Internal inlines */ +/*------------------------------------------------------------------------------ + * Internal inlines */ static __inline bool mdbx_is_power2(size_t x) { return (x & (x - 1)) == 0; } @@ -159,8 +159,8 @@ static __inline pgno_t pgno_align2os_pgno(const MDBX_env *env, pgno_t pgno) { *tp = tracked->mc_next; \ } while (0) -/*----------------------------------------------------------------------------*/ -/* LY: temporary workaround for Elbrus's memcmp() bug. */ +/*------------------------------------------------------------------------------ + * LY: temporary workaround for Elbrus's memcmp() bug. */ #if defined(__e2k__) && !__GLIBC_PREREQ(2, 24) int __hot mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2, @@ -2057,7 +2057,8 @@ static __inline MDBX_db *mdbx_outer_db(MDBX_cursor *mc) { return couple->outer.mc_db; } -static int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) { +static __must_check_result int mdbx_page_retire(MDBX_cursor *mc, + MDBX_page *mp) { MDBX_txn *txn = mc->mc_txn; mdbx_cassert(mc, (mc->mc_flags & C_SUB) == 0); @@ -2117,7 +2118,7 @@ static __must_check_result int mdbx_refund_loose(MDBX_txn *txn) { * * If the page wasn't dirtied in this txn, just add it * to this txn's free list. */ -static int mdbx_page_loose(MDBX_cursor *mc, MDBX_page *mp) { +static __must_check_result int mdbx_page_loose(MDBX_cursor *mc, MDBX_page *mp) { int loose = 0; const pgno_t pgno = mp->mp_pgno; MDBX_txn *txn = mc->mc_txn; @@ -4361,12 +4362,10 @@ int mdbx_txn_abort(MDBX_txn *txn) { } static __inline int mdbx_backlog_size(MDBX_txn *txn) { - int reclaimed_and_loose = - txn->mt_env->me_reclaimed_pglist - ? MDBX_PNL_SIZE(txn->mt_env->me_reclaimed_pglist) + - txn->mt_loose_count - : 0; - return reclaimed_and_loose; + int reclaimed = txn->mt_env->me_reclaimed_pglist + ? MDBX_PNL_SIZE(txn->mt_env->me_reclaimed_pglist) + : 0; + return reclaimed + txn->mt_loose_count; } static __inline int mdbx_backlog_extragap(MDBX_env *env) { @@ -7073,7 +7072,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { return rc; } -/****************************************************************************/ +/******************************************************************************/ /* Open and/or initialize the lock region for the environment. */ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, @@ -8303,7 +8302,8 @@ int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { return mdbx_cursor_set(&cx.outer, key, data, MDBX_SET, &exact); } -int mdbx_get2(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { +int mdbx_get_nearest(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *data) { DKBUF; mdbx_debug("===> get db %u key [%s]", dbi, DKEY(key)); @@ -8325,13 +8325,20 @@ int mdbx_get2(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - const int op = - (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) ? MDBX_GET_BOTH : MDBX_SET_KEY; + MDBX_val save_data = *data; int exact = 0; - rc = mdbx_cursor_set(&cx.outer, key, data, op, &exact); + rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_SET_RANGE, &exact); if (unlikely(rc != MDBX_SUCCESS)) return rc; + if (exact && (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) != 0) { + *data = save_data; + exact = 0; + rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_GET_BOTH_RANGE, &exact); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + return exact ? MDBX_SUCCESS : MDBX_RESULT_TRUE; } @@ -8828,7 +8835,6 @@ set1: /* Move the cursor to the first item in the database. */ static int mdbx_cursor_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { int rc; - MDBX_node *leaf; if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); @@ -8840,7 +8846,7 @@ static int mdbx_cursor_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { } mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); + MDBX_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); mc->mc_flags |= C_INITIALIZED; mc->mc_flags &= ~C_EOF; @@ -8872,7 +8878,6 @@ static int mdbx_cursor_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { /* Move the cursor to the last item in the database. */ static int mdbx_cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { int rc; - MDBX_node *leaf; if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); @@ -8888,7 +8893,7 @@ static int mdbx_cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) { mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; mc->mc_flags |= C_INITIALIZED | C_EOF; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + MDBX_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { key->iov_len = mc->mc_db->md_xsize; @@ -14879,7 +14884,7 @@ bailout: * перед передачей в качестве аргументов для дальнейших модификаций, либо * отвергнуты на стадии проверки корректности аргументов. * - * Таким образом, функция позволяет как избавится от лишнего копирования, + * Таким образом, функция позволяет как избавиться от лишнего копирования, * так и выполнить более полную проверку аргументов. * * ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только @@ -14899,7 +14904,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) { const MDBX_page *page = (const MDBX_page *)((uintptr_t)ptr & mask); /* LY: Тут не всё хорошо с абсолютной достоверностью результата, - * так как флажок P_DIRTY в LMDB может означать не совсем то, + * так как флажок P_DIRTY может означать не совсем то, * что было исходно задумано, детали см в логике кода mdbx_page_touch(). * * Более того, в режиме БЕЗ WRITEMAP грязные страницы выделяются через diff --git a/src/tools/mdbx_stat.c b/src/tools/mdbx_stat.c index cc5d5360..04c1fb93 100644 --- a/src/tools/mdbx_stat.c +++ b/src/tools/mdbx_stat.c @@ -61,7 +61,7 @@ static void usage(char *prog) { static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, mdbx_tid_t thread, uint64_t txnid, uint64_t lag, - size_t bytes_used, size_t bytes_retired) { + size_t bytes_used, size_t bytes_retained) { (void)ctx; if (num == 1) printf("Reader Table Status\n" @@ -73,7 +73,7 @@ static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, (int)sizeof(size_t) * 2, (size_t)thread); if (txnid) printf(" %20" PRIu64 " %10" PRIu64 " %12.1fM %12.1fM\n", txnid, lag, - bytes_used / 1048576.0, bytes_retired / 1048576.0); + bytes_used / 1048576.0, bytes_retained / 1048576.0); else printf(" %20s %10s %13s %13s\n", "-", "0", "0", "0"); diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 3de106ee..f606854a 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -16,7 +16,7 @@ case ${UNAME} in Linux) MAKE=make if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then - for old_test_dir in $(ls -d /tmp/mdbx-test.[0-9]*); do + for old_test_dir in $(ls -d /dev/shm/mdbx-test.[0-9]*); do rm -rf $old_test_dir done TESTDB_DIR="/dev/shm/mdbx-test.$$" diff --git a/test/test.cc b/test/test.cc index 60f426c4..25f54131 100644 --- a/test/test.cc +++ b/test/test.cc @@ -76,8 +76,8 @@ const char *keygencase2str(const keygen_case keycase) { //----------------------------------------------------------------------------- -int testcase::oom_callback(MDBX_env *env, int pid, mdbx_tid_t tid, uint64_t txn, - unsigned gap, int retry) { +int testcase::oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid, + uint64_t txn, unsigned gap, int retry) { testcase *self = (testcase *)mdbx_env_get_userctx(env); @@ -510,7 +510,7 @@ void testcase::db_table_close(MDBX_dbi handle) { void testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued) { MDBX_val actual_value = expected_valued; - int rc = mdbx_get2(txn_guard.get(), handle, &key2check, &actual_value); + int rc = mdbx_get_nearest(txn_guard.get(), handle, &key2check, &actual_value); if (unlikely(rc != MDBX_SUCCESS)) failure_perror(step, rc); if (!is_samedata(&actual_value, &expected_valued)) diff --git a/test/test.h b/test/test.h index f627ce25..d901030f 100644 --- a/test/test.h +++ b/test/test.h @@ -98,8 +98,8 @@ protected: mutable chrono::time progress_timestamp; } last; - static int oom_callback(MDBX_env *env, int pid, mdbx_tid_t tid, uint64_t txn, - unsigned gap, int retry); + static int oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid, + uint64_t txn, unsigned gap, int retry); void db_prepare(); void db_open();