From 46b891508781c67ddf278ca73d9649d0a356e60e Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Tue, 29 Mar 2016 04:50:29 +0300 Subject: [PATCH] mdbx: rework attributes. Change-Id: Id9d436a54ac14ed82c593710b1d5939871c89d1a --- lmdb.h | 350 ++++++++++++++++++++++++++----------------------------- mdb.c | 314 +++++++++++++++++++++++++------------------------ mtest7.c | 31 ++--- mtest8.c | 46 +++----- 4 files changed, 353 insertions(+), 388 deletions(-) diff --git a/lmdb.h b/lmdb.h index bc562e2e..9020778c 100644 --- a/lmdb.h +++ b/lmdb.h @@ -352,8 +352,6 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NODUPDATA 0x20 /** For mdb_cursor_put: overwrite the current key/data pair */ #define MDB_CURRENT 0x40 -/** For mdb_cursor_put_attr: set attribute */ -#define MDB_SETATTR 0x80 /** For put: Just reserve space for data, don't copy it. Return a * pointer to the reserved space. */ @@ -407,7 +405,6 @@ typedef enum MDB_cursor_op { MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ - MDB_GET_ATTR, /**< Get attribute of specified node */ MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; @@ -1359,31 +1356,6 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); */ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); - /** @brief Get items attribute from a database. - * - * This function retrieves key/data pairs attribute from the database. - * The attribute of the specified key-value pair is returned in - * uint64_t to which \b attrp refers. - * If the database supports duplicate keys (#MDB_DUPSORT) then both - * key and data parameters are required, otherwise data is ignored. - * - * @note Values returned from the database are valid only until a - * subsequent update operation, or the end of the transaction. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to search for in the database - * @param[in] data The data for #MDB_DUPSORT databases - * @param[out] attrp The pointer to the result - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - * - */ -int mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - uint64_t *attrp); - /** @brief Store items into a database. * * This function stores key/data pairs in the database. The default behavior @@ -1434,77 +1406,6 @@ int mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags); - /** @brief Store items and attributes into a database. - * - * This function stores key/data pairs in the database. The default behavior - * is to enter the new key/data pair, replacing any previously existing key - * if duplicates are disallowed, or adding a duplicate data item if - * duplicates are allowed (#MDB_DUPSORT). - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to store in the database - * @param[in] attr The attribute to store in the database - * @param[in,out] data The data to store - * @param[in] flags Special options for this operation. This parameter - * must be set to 0 or by bitwise OR'ing together one or more of the - * values described here. - * - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - * - */ -int mdb_put_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - uint64_t attr, unsigned int flags); - - /** @brief Set items attribute from a database. - * - * This function stores key/data pairs attribute to the database. - * If the database supports duplicate keys (#MDB_DUPSORT) then both - * key and data parameters are required, otherwise data is ignored. - * - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to search for in the database - * @param[in] data The data for #MDB_DUPSORT databases - * @param[in] attr The attribute to be stored - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - * - */ -int mdb_set_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - uint64_t attrp); - /** @brief Delete items from a database. * * This function removes key/data pairs from the database. @@ -1613,30 +1514,6 @@ MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op); - /** @brief Get items attribute from a database cursor. - * - * This function retrieves key/data pairs attribute from the database. - * The attribute of the specified key-value pair is returned in - * uint64_t to which \b attrp refers. - * If the database supports duplicate keys (#MDB_DUPSORT) then both - * key and data parameters are required, otherwise data is ignored. - * - * @note Values returned from the database are valid only until a - * subsequent update operation, or the end of the transaction. - * @param[in] mc A database cursor pointing at the node - * @param[in] key The key to search for in the database - * @param[in] data The data for #MDB_DUPSORT databases - * @param[out] attrp The pointer to the result - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - * - */ -int mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, - uint64_t *attrp); - /** @brief Store by cursor. * * This function stores key/data pairs into the database. @@ -1699,69 +1576,6 @@ int mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, unsigned flags); - /** @brief Store by cursor with attribute. - * - * This function stores key/data pairs into the database. - * The cursor is positioned at the new item, or on failure usually near it. - * @note Earlier documentation incorrectly said errors would leave the - * state of the cursor unchanged. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @param[in] key The key operated on. - * @param[in] data The data operated on. - * @param[in] attr The attribute. - * @param[in] flags Options for this operation. This parameter - * must be set to 0 or one of the values described here. - * - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - * - */ -int mdb_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, - uint64_t attr, unsigned int flags); - /** @brief Delete current key/data pair * * This function deletes the key/data pair to which the cursor refers. @@ -1916,6 +1730,170 @@ int mdbx_env_pgwalk(MDB_txn *txn, MDB_pgvisitor_func* visitor, void* ctx); char* mdb_dkey(MDB_val *key, char *buf); +/* attribute support functions for Nexenta ***********************************/ +#if MDBX_MODE_ENABLED + +typedef uint64_t mdbx_attr_t; + + /** @brief Store by cursor with attribute. + * + * This function stores key/data pairs into the database. + * The cursor is positioned at the new item, or on failure usually near it. + * @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT. + * @note Earlier documentation incorrectly said errors would leave the + * state of the cursor unchanged. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] key The key operated on. + * @param[in] data The data operated on. + * @param[in] attr The attribute. + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + * + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdbx_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + mdbx_attr_t attr, unsigned flags); + + /** @brief Store items and attributes into a database. + * + * This function stores key/data pairs in the database. The default behavior + * is to enter the new key/data pair, replacing any previously existing key + * if duplicates are disallowed. + * @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to store in the database + * @param[in] attr The attribute to store in the database + * @param[in,out] data The data to store + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdbx_put_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + mdbx_attr_t attr, unsigned flags); + + /** @brief Set items attribute from a database. + * + * This function stores key/data pairs attribute to the database. + * @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[in] data The data to be stored or NULL to save previous value. + * @param[in] attr The attribute to be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdbx_set_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + mdbx_attr_t attr); + + /** @brief Get items attribute from a database cursor. + * + * This function retrieves key/data pairs attribute from the database. + * The attribute of the specified key-value pair is returned in + * uint64_t to which \b attrptr refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data could be NULL. + * + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] mc A database cursor pointing at the node + * @param[in] key The key to search for in the database + * @param[in,out] data The data for #MDB_DUPSORT databases + * @param[out] attrptr The pointer to the result + * @param[in] op A cursor operation #MDB_cursor_op + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdbx_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, + mdbx_attr_t *attrptr, MDB_cursor_op op); + + /** @brief Get items attribute from a database. + * + * This function retrieves key/data pairs attribute from the database. + * The attribute of the specified key-value pair is returned in + * uint64_t to which \b attrptr refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data is ignored. + * + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[in] data The data for #MDB_DUPSORT databases + * @param[out] attrptr The pointer to the result + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdbx_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + mdbx_attr_t *attrptr); + +#endif /* MDBX_MODE_ENABLED */ + #ifdef __cplusplus } #endif diff --git a/mdb.c b/mdb.c index 158b71f1..bf7f7d9f 100644 --- a/mdb.c +++ b/mdb.c @@ -591,11 +591,6 @@ typedef struct MDB_node { /** @} */ unsigned short mn_flags; /**< @ref mdb_node */ unsigned short mn_ksize; /**< key size */ -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned int mn_attr_lo, mn_attr_hi; /**< node attribute */ -#else - unsigned int mn_attr_hi, mn_attr_lo; -#endif char mn_data[1]; /**< key and data are appended here */ } MDB_node; @@ -640,13 +635,6 @@ typedef struct MDB_node { (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) /** The size of a key in a node */ #define NODEKSZ(node) ((node)->mn_ksize) - /** The attribute of the node as uint64_t */ -#define NODEATTR(node) \ - ((uint64_t)(node)->mn_attr_lo | ((uint64_t)(node)->mn_attr_hi << 32)) - /** Set node attribute */ -#define SETATTR(node,attr) do { \ - (node)->mn_attr_lo = (attr) & 0xffffffffUL; \ - (node)->mn_attr_hi = (attr) >> 32; } while (0) /** Copy a page number from src to dst */ #ifdef MISALIGNED_OK @@ -1059,7 +1047,7 @@ static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); #define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, - pgno_t newpgno, uint64_t newattr, unsigned nflags); + pgno_t newpgno, unsigned nflags); static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static int mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending); @@ -1067,8 +1055,7 @@ static void mdb_env_close0(MDB_env *env); static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, - uint64_t attr, unsigned flags); + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags); static void mdb_node_del(MDB_cursor *mc, int ksize); static void mdb_node_shrink(MDB_page *mp, indx_t indx); static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft); @@ -1088,7 +1075,7 @@ static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, - int *exactp, uint64_t *attrp); + int *exactp); static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); @@ -1100,7 +1087,6 @@ static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force); static int mdb_drop0(MDB_cursor *mc, int subs); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead); -static int mdb_cursor_touch(MDB_cursor *mc); /** @cond */ static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int_ai, mdb_cmp_int_a2, mdb_cmp_int_ua; @@ -5779,36 +5765,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, return MDB_BAD_TXN; mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_set(&mc, key, data, MDB_SET, &exact, NULL); -} - -int -mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t *attrp) -{ - int exact = 0; - return mdb_cursor_set(mc, key, data, MDB_GET_ATTR, &exact, attrp); -} - -int -mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, uint64_t *attrp) -{ - MDB_cursor mc; - MDB_xcursor mx; - - if (!key || !attrp || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) - return EINVAL; - /** TODO: implement support for DUPSORT? */ - if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - return ENOTSUP; - if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) && !data) - return EINVAL; - - if (txn->mt_flags & MDB_TXN_ERROR) - return MDB_BAD_TXN; - - mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_get_attr(&mc, key, data, attrp); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); } /** Find a sibling for a page. @@ -6033,7 +5990,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) /** Set the cursor on a specific data item. */ static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, - MDB_cursor_op op, int *exactp, uint64_t *attrp) + MDB_cursor_op op, int *exactp) { int rc; MDB_page *mp; @@ -6198,7 +6155,7 @@ set1: } else { ex2p = NULL; } - rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p, attrp); + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); if (unlikely(rc != MDB_SUCCESS)) return rc; } @@ -6222,9 +6179,6 @@ set1: } } - if (op == MDB_GET_ATTR) - *attrp = NODEATTR(leaf); - /* The key already matches in all other cases */ if (op == MDB_SET_RANGE || op == MDB_SET_KEY) MDB_GET_KEY(leaf, key); @@ -6387,7 +6341,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = EINVAL; } else { rc = mdb_cursor_set(mc, key, data, op, - op == MDB_SET_RANGE ? NULL : &exact, NULL); + op == MDB_SET_RANGE ? NULL : &exact); } break; case MDB_GET_MULTIPLE: @@ -6555,7 +6509,7 @@ mdb_cursor_touch(MDB_cursor *mc) #define MDB_NOSPILL 0x8000 int -mdb_cursor_put_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t attr, +mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned flags) { MDB_env *env; @@ -6657,20 +6611,11 @@ mdb_cursor_put_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t attr, } } } else { - rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact, NULL); + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); } if ((flags & MDB_NOOVERWRITE) && rc == 0) { mdb_debug("duplicate key [%s]", DKEY(key)); *data = d2; - if (F_ISSET(flags, MDB_SETATTR)) { - /* make sure all cursor pages are writable */ - rc2 = mdb_cursor_touch(mc); - if (rc2) - return rc2; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], - mc->mc_ki[mc->mc_top]); - SETATTR(leaf, attr); - } return MDB_KEYEXIST; } if (rc && unlikely(rc != MDB_NOTFOUND)) @@ -6947,8 +6892,6 @@ current: omp = np; } SETDSZ(leaf, data->mv_size); - if (F_ISSET(flags, MDB_SETATTR)) - SETATTR(leaf, attr); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = PAGEDATA(omp); else @@ -6963,8 +6906,6 @@ current: * also reuse this node if the new data is smaller, * but instead we opt to shrink the node in that case. */ - if (F_ISSET(flags, MDB_SETATTR)) - SETATTR(leaf, attr); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = olddata.mv_data; else if (!(mc->mc_flags & C_SUB)) @@ -6988,10 +6929,10 @@ new_sub: nflags &= ~MDB_APPEND; /* sub-page may need room to grow */ if (!insert_key) nflags |= MDB_SPLIT_REPLACE; - rc = mdb_page_split(mc, key, rdata, P_INVALID, attr, nflags); + rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); } else { /* There is room already in this leaf page. */ - rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, attr, nflags); + rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ MDB_cursor *m2, *m3; @@ -7113,41 +7054,6 @@ bad_sub: return rc; } -int -mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, - unsigned int flags) -{ - flags &= ~MDB_SETATTR; - return mdb_cursor_put_attr(mc, key, data, 0, flags); -} - -int -mdb_set_attr(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, uint64_t attr) -{ - MDB_cursor mc; - MDB_xcursor mx; - MDB_val dummy, *rdata = data ? data : &dummy; - int rc, exact = 1; - - if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) - return EINVAL; - /** TODO: implement support for DUPSORT? */ - if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - return ENOTSUP; - if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) && !data) - return EINVAL; - - if (txn->mt_flags & MDB_TXN_ERROR) - return MDB_BAD_TXN; - - mdb_cursor_init(&mc, txn, dbi, &mx); - if ((rc = mdb_cursor_set(&mc, key, rdata, MDB_SET, &exact, NULL)) != MDB_SUCCESS) - return rc; - return mdb_cursor_put_attr(&mc, key, rdata, attr, - MDB_CURRENT|MDB_SETATTR); -} - int mdb_cursor_del(MDB_cursor *mc, unsigned flags) { @@ -7363,7 +7269,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key) */ static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, uint64_t attr, unsigned flags) + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags) { unsigned i; size_t node_size = NODESIZE; @@ -7445,10 +7351,9 @@ update: node = NODEPTR(mp, indx); node->mn_ksize = (key == NULL) ? 0 : key->mv_size; node->mn_flags = flags; - if (IS_LEAF(mp)) { + if (IS_LEAF(mp)) SETDSZ(node,data->mv_size); - SETATTR(node,attr); - } else + else SETPGNO(node,pgno); if (key) @@ -7906,7 +7811,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) mdb_debug("Not enough room, delta = %d, splitting...", delta); pgno = NODEPGNO(node); mdb_node_del(mc, 0); - return mdb_page_split(mc, key, NULL, pgno, 0, MDB_SPLIT_REPLACE); + return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); } numkeys = NUMKEYS(mp); @@ -7959,7 +7864,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) { MDB_node *srcnode; MDB_val key, data; - uint64_t attr = 0UL; pgno_t srcpg; MDB_cursor mn; int rc; @@ -8007,7 +7911,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); - attr = NODEATTR(srcnode); } mn.mc_xcursor = NULL; if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { @@ -8044,8 +7947,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) /* Add the node to the destination page. */ - rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, - attr, flags); + rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); if (unlikely(rc != MDB_SUCCESS)) return rc; @@ -8224,7 +8126,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) key.mv_size = csrc->mc_db->md_xsize; key.mv_data = PAGEDATA(psrc); for (i = 0; i < NUMKEYS(psrc); i++, j++) { - rc = mdb_node_add(cdst, j, &key, NULL, 0, 0, 0); + rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); if (unlikely(rc != MDB_SUCCESS)) return rc; key.mv_data = (char *)key.mv_data + key.mv_size; @@ -8256,8 +8158,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); - rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), - NODEATTR(srcnode), srcnode->mn_flags); + rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); if (unlikely(rc != MDB_SUCCESS)) return rc; } @@ -8672,7 +8573,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, xdata = NULL; flags |= MDB_NODUPDATA; } - rc = mdb_cursor_set(&mc, key, xdata, op, &exact, NULL); + rc = mdb_cursor_set(&mc, key, xdata, op, &exact); if (likely(rc == 0)) { /* let mdb_page_split know about this cursor if needed: * delete will trigger a rebalance; if it needs to move @@ -8698,13 +8599,12 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, * @param[in] newkey The key for the newly inserted node. * @param[in] newdata The data for the newly inserted node. * @param[in] newpgno The page number, if the new node is a branch node. - * @param[in] newattr The node attr for the newly inserted node. * @param[in] nflags The #NODE_ADD_FLAGS for the new node. * @return 0 on success, non-zero on failure. */ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, - uint64_t newattr, unsigned nflags) + unsigned nflags) { unsigned flags; int rc = MDB_SUCCESS, new_root = 0, did_split = 0; @@ -8754,7 +8654,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno new_root = mc->mc_db->md_depth++; /* Add left (implicit) pointer. */ - if (unlikely((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0, 0)) != MDB_SUCCESS)) { + if (unlikely((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS)) { /* undo the pre-push */ mc->mc_pg[0] = mc->mc_pg[1]; mc->mc_ki[0] = mc->mc_ki[1]; @@ -8918,8 +8818,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mn.mc_top--; did_split = 1; /* We want other splits to find mn when doing fixups */ - WITH_CURSOR_TRACKING(mn, - rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0, 0)); + WITH_CURSOR_TRACKING(mn, rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0)); if (unlikely(rc != MDB_SUCCESS)) goto done; @@ -8947,7 +8846,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno } } else { mn.mc_top--; - rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0, 0); + rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); mn.mc_top++; } if (unlikely(rc != MDB_SUCCESS)) @@ -8955,14 +8854,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if (nflags & MDB_APPEND) { mc->mc_pg[mc->mc_top] = rp; mc->mc_ki[mc->mc_top] = 0; - rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, newattr, nflags); + rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); if (rc) goto done; for (i=0; imc_top; i++) mc->mc_ki[i] = mn.mc_ki[i]; } else if (!IS_LEAF2(mp)) { /* Move nodes */ - uint64_t rattr; mc->mc_pg[mc->mc_top] = rp; i = split_indx; j = 0; @@ -8970,7 +8868,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if (i == newindx) { rkey.mv_data = newkey->mv_data; rkey.mv_size = newkey->mv_size; - rattr = newattr; if (IS_LEAF(mp)) { rdata = newdata; } else @@ -8982,7 +8879,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); rkey.mv_data = NODEKEY(node); rkey.mv_size = node->mn_ksize; - rattr = NODEATTR(node); if (IS_LEAF(mp)) { xdata.mv_data = NODEDATA(node); xdata.mv_size = NODEDSZ(node); @@ -8997,7 +8893,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno rkey.mv_size = 0; } - rc = mdb_node_add(mc, j, &rkey, rdata, pgno, rattr, flags); + rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); if (rc) goto done; if (i == nkeys) { @@ -9153,27 +9049,6 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, return rc; } -int -mdb_put_attr(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, uint64_t attr, unsigned int flags) -{ - MDB_cursor mc; - MDB_xcursor mx; - - if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) - return EINVAL; - - /** TODO: implement support for DUPSORT? */ - if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - return ENOTSUP; - - if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) - return EINVAL; - - mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_put_attr(&mc, key, data, attr, flags | MDB_SETATTR); -} - #ifndef MDB_WBUF #define MDB_WBUF (1024*1024) #endif @@ -9974,7 +9849,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned flags, MDB_dbi *dbi) key.mv_size = len; key.mv_data = (void *)name; mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); - rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact, NULL); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); if (likely(rc == MDB_SUCCESS)) { /* make sure this is actually a DB */ MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); @@ -10709,6 +10584,143 @@ mdbx_env_pgwalk(MDB_txn *txn, MDB_pgvisitor_func* visitor, void* user) return rc; } +/* attribute support functions for Nexenta ***********************************/ + +static __inline int +mdbx_attr_peek(MDB_val *data, mdbx_attr_t *attrptr) +{ + if (unlikely(data->mv_size < sizeof(mdbx_attr_t))) + return MDB_INCOMPATIBLE; + + if (likely(attrptr != NULL)) + *attrptr = *(mdbx_attr_t*) data->mv_data; + data->mv_size -= sizeof(mdbx_attr_t); + data->mv_data = likely(data->mv_size > 0) + ? ((mdbx_attr_t*) data->mv_data) + 1 : NULL; + + return MDB_SUCCESS; +} + +static __inline int +mdbx_attr_poke(MDB_val *reserved, MDB_val *data, mdbx_attr_t attr, unsigned flags) +{ + mdbx_attr_t *space = reserved->mv_data; + if (flags & MDB_RESERVE) { + if (likely(data != NULL)) { + data->mv_data = data->mv_size ? space + 1 : NULL; + } + } else { + *space = attr; + if (likely(data != NULL)) { + memcpy(space + 1, data->mv_data, data->mv_size ); + } + } + + return MDB_SUCCESS; +} + +int +mdbx_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, + mdbx_attr_t *attrptr, MDB_cursor_op op) +{ + int rc = mdbx_cursor_get(mc, key, data, op); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_peek(data, attrptr); +} + +int +mdbx_get_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, uint64_t *attrptr) +{ + int rc = mdbx_get(txn, dbi, key, data); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_peek(data, attrptr); +} + +int +mdbx_put_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, mdbx_attr_t attr, unsigned flags) +{ + MDB_val reserve = { + .mv_data = NULL, + .mv_size = (data ? data->mv_size : 0) + sizeof(mdbx_attr_t) + }; + + int rc = mdbx_put(txn, dbi, key, &reserve, flags | MDB_RESERVE); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_poke(&reserve, data, attr, flags); +} + +int mdbx_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + mdbx_attr_t attr, unsigned flags) +{ + MDB_val reserve = { + .mv_data = NULL, + .mv_size = (data ? data->mv_size : 0) + sizeof(mdbx_attr_t) + }; + + int rc = mdbx_cursor_put(cursor, key, &reserve, flags | MDB_RESERVE); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_poke(&reserve, data, attr, flags); +} + +int mdbx_set_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, mdbx_attr_t attr) +{ + MDB_cursor mc; + MDB_xcursor mx; + MDB_val old_data; + mdbx_attr_t old_attr; + int rc; + + if (unlikely(!key || !txn)) + return EINVAL; + + if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDB_VERSION_MISMATCH; + + if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) + return EINVAL; + + if (unlikely(txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + rc = mdb_cursor_set(&mc, key, &old_data, MDB_SET, NULL); + if (unlikely(rc != MDB_SUCCESS)) { + if (rc == MDB_NOTFOUND && data) { + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdbx_cursor_put_attr(&mc, key, data, attr, 0); + txn->mt_cursors[dbi] = mc.mc_next; + } + return rc; + } + + rc = mdbx_attr_peek(&old_data, &old_attr); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + if (old_attr == attr && (!data || + (data->mv_size == old_data.mv_size + && memcpy(data->mv_data, old_data.mv_data, old_data.mv_size) == 0))) + return MDB_SUCCESS; + + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdbx_cursor_put_attr(&mc, key, data ? data : &old_data, attr, MDB_CURRENT); + txn->mt_cursors[dbi] = mc.mc_next; + return rc; +} + #endif /* MDBX_MODE_ENABLED */ /** @} */ diff --git a/mtest7.c b/mtest7.c index 34e925e1..0e15bc9e 100644 --- a/mtest7.c +++ b/mtest7.c @@ -27,6 +27,10 @@ char dkbuf[1024]; +#ifndef DBPATH +# define DBPATH "./testdb/data.mdb" +#endif + int main(int argc,char * argv[]) { int i = 0, j = 0, rc; @@ -51,10 +55,11 @@ int main(int argc,char * argv[]) values = (int *)malloc(count*sizeof(int)); timestamps = (uint64_t *)calloc(count,sizeof(uint64_t)); + unlink(DBPATH); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 104857600)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest7.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi)); @@ -70,33 +75,17 @@ int main(int argc,char * argv[]) values[i] = rand()%16383 ^ (timestamps[i] & 0xffff); key.mv_data = values + i; sprintf(sval, "%03x %d foo bar", values[i], values[i]); - rc = mdb_set_attr(txn, dbi, &key, &data, timestamps[i]); - if (rc && rc != MDB_NOTFOUND) { - printf("mdb_set_attr returned %d\n", rc); - break; - } - if (i % 2) { - if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, - MDB_NODUPDATA))) { - j++; - continue; - } - E(mdb_set_attr(txn, dbi, &key, &data, timestamps[i])); - } else - E(mdb_put_attr(txn, dbi, &key, &data, timestamps[i], - MDB_NODUPDATA)); + E(mdbx_put_attr(txn, dbi, &key, &data, timestamps[i], MDB_NODUPDATA)); } if (j) printf("%d duplicates skipped\n", j); E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 10485760)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest7.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi)); @@ -105,7 +94,7 @@ int main(int argc,char * argv[]) continue; key.mv_data = values + i; sprintf(sval, "%03x %d foo bar", values[i], values[i]); - E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(mdbx_get_attr(txn, dbi, &key, &data, ×tamp)); if (timestamps[i] != timestamp) { for (j = 0; j < count; ++j) { if (j != i && values[i] == values[j] && @@ -129,8 +118,6 @@ int main(int argc,char * argv[]) E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); return 0; diff --git a/mtest8.c b/mtest8.c index 598e4c06..f5895628 100644 --- a/mtest8.c +++ b/mtest8.c @@ -27,12 +27,16 @@ char dkbuf[1024]; +#ifndef DBPATH +# define DBPATH "./testdb/data.mdb" +#endif + int main(int argc,char * argv[]) { int i = 0, rc; MDB_env *env; MDB_dbi dbi; - MDB_val key, data, data1; + MDB_val key, data; MDB_txn *txn; MDB_stat mst; int count; @@ -61,10 +65,11 @@ int main(int argc,char * argv[]) for (i = 2; i < count; ++i) values[i] = values[i - 1] + values[i - 2]; + unlink(DBPATH); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 104857600)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); @@ -76,82 +81,65 @@ int main(int argc,char * argv[]) snprintf(sval, 4000, "Value %d\n", values[i]); snprintf(sval + 4000, 4000, "Value %d\n", values[i]); key.mv_data = values + i; - E(mdb_put_attr(txn, dbi, &key, &data, timestamps[i], - MDB_NODUPDATA)); + E(mdbx_put_attr(txn, dbi, &key, &data, timestamps[i], MDB_NOOVERWRITE)); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 10485760)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); - E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi)); for (i = 0; i < count; ++i) { key.mv_data = values + i; - E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(mdbx_get_attr(txn, dbi, &key, &data, ×tamp)); E(timestamps[i] != timestamp); - - E(mdb_get(txn, dbi, &key, &data1)); - E(data.mv_size != data1.mv_size); - E(memcmp(data.mv_data, data1.mv_data, data.mv_size)); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 104857600)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); - E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi)); for (i = 0; i < count; ++i) { (void)gettimeofday(&tv, NULL); timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec; key.mv_data = values + i; - E(mdb_set_attr(txn, dbi, &key, NULL, timestamps[i])); + E(mdbx_set_attr(txn, dbi, &key, NULL, timestamps[i])); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 10485760)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); - E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi)); for (i = 0; i < count; ++i) { key.mv_data = values + i; - E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(mdbx_get_attr(txn, dbi, &key, &data, ×tamp)); E(timestamps[i] != timestamp); - - E(mdb_get(txn, dbi, &key, &data1)); - E(data.mv_size != data1.mv_size); - E(memcmp(data.mv_data, data1.mv_data, data.mv_size)); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); return 0;