From ce06c8df9e2db8bbc84065ab1c211e51344540a8 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Mon, 28 Mar 2016 04:04:18 +0300 Subject: [PATCH 1/2] mdbx: 'attributes' support for Nexenta. Change-Id: Ib7a33d6a489d3ef6cfe67349c1ae8946a6a0548a --- Makefile | 10 ++- lmdb.h | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ mdb.c | 177 ++++++++++++++++++++++++++++++++++++++++++++-------- mtest7.c | 137 ++++++++++++++++++++++++++++++++++++++++ mtest8.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 641 insertions(+), 27 deletions(-) create mode 100644 mtest7.c create mode 100644 mtest8.c diff --git a/Makefile b/Makefile index 25df09e3..53bdf54d 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ IHDRS := lmdb.h mdbx.h ILIBS := libmdbx.a libmdbx.so IPROGS := mdbx_stat mdbx_copy mdbx_dump mdbx_load mdbx_chk IDOCS := mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1 -PROGS := $(IPROGS) mtest0 mtest1 mtest2 mtest3 mtest4 mtest5 mtest6 wbench +PROGS := $(IPROGS) mtest0 mtest1 mtest2 mtest3 mtest4 mtest5 mtest6 mtest7 mtest8 wbench PROGS += yota_test1 yota_test2 SRC_LMDB := mdb.c midl.c lmdb.h midl.h reopen.h barriers.h @@ -68,6 +68,8 @@ check: tests && echo "*** LMDB-TEST-4" && ./mtest4 && ./mdbx_chk -v testdb \ && echo "*** LMDB-TEST-5" && ./mtest5 && ./mdbx_chk -v testdb \ && echo "*** LMDB-TEST-6" && ./mtest6 && ./mdbx_chk -v testdb \ + && echo "*** LMDB-TEST-7" && ./mtest7 && ./mdbx_chk -v testdb \ + && echo "*** LMDB-TEST-8" && ./mtest8 && ./mdbx_chk -v testdb \ && echo "*** LMDB-TESTs - all done" libmdbx.a: mdbx.o @@ -118,6 +120,12 @@ mtest5: mtest5.o mdbx.o mtest6: mtest6.o mdbx.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ +mtest7: mtest7.o mdbx.o + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ + +mtest8: mtest8.o mdbx.o + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ + yota_test1: yota_test1.o mdbx.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ diff --git a/lmdb.h b/lmdb.h index da797d2c..bc562e2e 100644 --- a/lmdb.h +++ b/lmdb.h @@ -352,6 +352,8 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NODUPDATA 0x20 /** For mdb_cursor_put: overwrite the current key/data pair */ #define MDB_CURRENT 0x40 +/** For mdb_cursor_put_attr: set attribute */ +#define MDB_SETATTR 0x80 /** For put: Just reserve space for data, don't copy it. Return a * pointer to the reserved space. */ @@ -405,6 +407,7 @@ typedef enum MDB_cursor_op { MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ + MDB_GET_ATTR, /**< Get attribute of specified node */ MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; @@ -1356,6 +1359,31 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); */ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + /** @brief Get items attribute from a database. + * + * This function retrieves key/data pairs attribute from the database. + * The attribute of the specified key-value pair is returned in + * uint64_t to which \b attrp refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data is ignored. + * + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[in] data The data for #MDB_DUPSORT databases + * @param[out] attrp The pointer to the result + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + uint64_t *attrp); + /** @brief Store items into a database. * * This function stores key/data pairs in the database. The default behavior @@ -1406,6 +1434,77 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags); + /** @brief Store items and attributes into a database. + * + * This function stores key/data pairs in the database. The default behavior + * is to enter the new key/data pair, replacing any previously existing key + * if duplicates are disallowed, or adding a duplicate data item if + * duplicates are allowed (#MDB_DUPSORT). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to store in the database + * @param[in] attr The attribute to store in the database + * @param[in,out] data The data to store + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_put_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + uint64_t attr, unsigned int flags); + + /** @brief Set items attribute from a database. + * + * This function stores key/data pairs attribute to the database. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data is ignored. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[in] data The data for #MDB_DUPSORT databases + * @param[in] attr The attribute to be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_set_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + uint64_t attrp); + /** @brief Delete items from a database. * * This function removes key/data pairs from the database. @@ -1514,6 +1613,30 @@ MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op); + /** @brief Get items attribute from a database cursor. + * + * This function retrieves key/data pairs attribute from the database. + * The attribute of the specified key-value pair is returned in + * uint64_t to which \b attrp refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data is ignored. + * + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] mc A database cursor pointing at the node + * @param[in] key The key to search for in the database + * @param[in] data The data for #MDB_DUPSORT databases + * @param[out] attrp The pointer to the result + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, + uint64_t *attrp); + /** @brief Store by cursor. * * This function stores key/data pairs into the database. @@ -1576,6 +1699,69 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, unsigned flags); + /** @brief Store by cursor with attribute. + * + * This function stores key/data pairs into the database. + * The cursor is positioned at the new item, or on failure usually near it. + * @note Earlier documentation incorrectly said errors would leave the + * state of the cursor unchanged. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] key The key operated on. + * @param[in] data The data operated on. + * @param[in] attr The attribute. + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + * + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + uint64_t attr, unsigned int flags); + /** @brief Delete current key/data pair * * This function deletes the key/data pair to which the cursor refers. diff --git a/mdb.c b/mdb.c index dbdcc196..158b71f1 100644 --- a/mdb.c +++ b/mdb.c @@ -591,6 +591,11 @@ typedef struct MDB_node { /** @} */ unsigned short mn_flags; /**< @ref mdb_node */ unsigned short mn_ksize; /**< key size */ +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned int mn_attr_lo, mn_attr_hi; /**< node attribute */ +#else + unsigned int mn_attr_hi, mn_attr_lo; +#endif char mn_data[1]; /**< key and data are appended here */ } MDB_node; @@ -635,6 +640,13 @@ typedef struct MDB_node { (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) /** The size of a key in a node */ #define NODEKSZ(node) ((node)->mn_ksize) + /** The attribute of the node as uint64_t */ +#define NODEATTR(node) \ + ((uint64_t)(node)->mn_attr_lo | ((uint64_t)(node)->mn_attr_hi << 32)) + /** Set node attribute */ +#define SETATTR(node,attr) do { \ + (node)->mn_attr_lo = (attr) & 0xffffffffUL; \ + (node)->mn_attr_hi = (attr) >> 32; } while (0) /** Copy a page number from src to dst */ #ifdef MISALIGNED_OK @@ -1047,7 +1059,7 @@ static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); #define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, - pgno_t newpgno, unsigned nflags); + pgno_t newpgno, uint64_t newattr, unsigned nflags); static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static int mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending); @@ -1055,7 +1067,8 @@ static void mdb_env_close0(MDB_env *env); static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags); + MDB_val *key, MDB_val *data, pgno_t pgno, + uint64_t attr, unsigned flags); static void mdb_node_del(MDB_cursor *mc, int ksize); static void mdb_node_shrink(MDB_page *mp, indx_t indx); static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft); @@ -1075,7 +1088,7 @@ static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, - int *exactp); + int *exactp, uint64_t *attrp); static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); @@ -1087,6 +1100,7 @@ static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force); static int mdb_drop0(MDB_cursor *mc, int subs); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead); +static int mdb_cursor_touch(MDB_cursor *mc); /** @cond */ static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int_ai, mdb_cmp_int_a2, mdb_cmp_int_ua; @@ -5765,7 +5779,36 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, return MDB_BAD_TXN; mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact, NULL); +} + +int +mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t *attrp) +{ + int exact = 0; + return mdb_cursor_set(mc, key, data, MDB_GET_ATTR, &exact, attrp); +} + +int +mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, uint64_t *attrp) +{ + MDB_cursor mc; + MDB_xcursor mx; + + if (!key || !attrp || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) + return EINVAL; + /** TODO: implement support for DUPSORT? */ + if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + return ENOTSUP; + if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) && !data) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_get_attr(&mc, key, data, attrp); } /** Find a sibling for a page. @@ -5990,7 +6033,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) /** Set the cursor on a specific data item. */ static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, - MDB_cursor_op op, int *exactp) + MDB_cursor_op op, int *exactp, uint64_t *attrp) { int rc; MDB_page *mp; @@ -6155,7 +6198,7 @@ set1: } else { ex2p = NULL; } - rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p, attrp); if (unlikely(rc != MDB_SUCCESS)) return rc; } @@ -6179,6 +6222,9 @@ set1: } } + if (op == MDB_GET_ATTR) + *attrp = NODEATTR(leaf); + /* The key already matches in all other cases */ if (op == MDB_SET_RANGE || op == MDB_SET_KEY) MDB_GET_KEY(leaf, key); @@ -6341,7 +6387,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = EINVAL; } else { rc = mdb_cursor_set(mc, key, data, op, - op == MDB_SET_RANGE ? NULL : &exact); + op == MDB_SET_RANGE ? NULL : &exact, NULL); } break; case MDB_GET_MULTIPLE: @@ -6509,7 +6555,7 @@ mdb_cursor_touch(MDB_cursor *mc) #define MDB_NOSPILL 0x8000 int -mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, +mdb_cursor_put_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t attr, unsigned flags) { MDB_env *env; @@ -6611,11 +6657,20 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, } } } else { - rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact, NULL); } if ((flags & MDB_NOOVERWRITE) && rc == 0) { mdb_debug("duplicate key [%s]", DKEY(key)); *data = d2; + if (F_ISSET(flags, MDB_SETATTR)) { + /* make sure all cursor pages are writable */ + rc2 = mdb_cursor_touch(mc); + if (rc2) + return rc2; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], + mc->mc_ki[mc->mc_top]); + SETATTR(leaf, attr); + } return MDB_KEYEXIST; } if (rc && unlikely(rc != MDB_NOTFOUND)) @@ -6892,6 +6947,8 @@ current: omp = np; } SETDSZ(leaf, data->mv_size); + if (F_ISSET(flags, MDB_SETATTR)) + SETATTR(leaf, attr); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = PAGEDATA(omp); else @@ -6906,6 +6963,8 @@ current: * also reuse this node if the new data is smaller, * but instead we opt to shrink the node in that case. */ + if (F_ISSET(flags, MDB_SETATTR)) + SETATTR(leaf, attr); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = olddata.mv_data; else if (!(mc->mc_flags & C_SUB)) @@ -6929,10 +6988,10 @@ new_sub: nflags &= ~MDB_APPEND; /* sub-page may need room to grow */ if (!insert_key) nflags |= MDB_SPLIT_REPLACE; - rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); + rc = mdb_page_split(mc, key, rdata, P_INVALID, attr, nflags); } else { /* There is room already in this leaf page. */ - rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); + rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, attr, nflags); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ MDB_cursor *m2, *m3; @@ -7054,6 +7113,41 @@ bad_sub: return rc; } +int +mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, + unsigned int flags) +{ + flags &= ~MDB_SETATTR; + return mdb_cursor_put_attr(mc, key, data, 0, flags); +} + +int +mdb_set_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, uint64_t attr) +{ + MDB_cursor mc; + MDB_xcursor mx; + MDB_val dummy, *rdata = data ? data : &dummy; + int rc, exact = 1; + + if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) + return EINVAL; + /** TODO: implement support for DUPSORT? */ + if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + return ENOTSUP; + if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) && !data) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + if ((rc = mdb_cursor_set(&mc, key, rdata, MDB_SET, &exact, NULL)) != MDB_SUCCESS) + return rc; + return mdb_cursor_put_attr(&mc, key, rdata, attr, + MDB_CURRENT|MDB_SETATTR); +} + int mdb_cursor_del(MDB_cursor *mc, unsigned flags) { @@ -7269,7 +7363,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key) */ static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags) + MDB_val *key, MDB_val *data, pgno_t pgno, uint64_t attr, unsigned flags) { unsigned i; size_t node_size = NODESIZE; @@ -7351,9 +7445,10 @@ update: node = NODEPTR(mp, indx); node->mn_ksize = (key == NULL) ? 0 : key->mv_size; node->mn_flags = flags; - if (IS_LEAF(mp)) + if (IS_LEAF(mp)) { SETDSZ(node,data->mv_size); - else + SETATTR(node,attr); + } else SETPGNO(node,pgno); if (key) @@ -7811,7 +7906,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) mdb_debug("Not enough room, delta = %d, splitting...", delta); pgno = NODEPGNO(node); mdb_node_del(mc, 0); - return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); + return mdb_page_split(mc, key, NULL, pgno, 0, MDB_SPLIT_REPLACE); } numkeys = NUMKEYS(mp); @@ -7864,6 +7959,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) { MDB_node *srcnode; MDB_val key, data; + uint64_t attr = 0UL; pgno_t srcpg; MDB_cursor mn; int rc; @@ -7911,6 +8007,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); + attr = NODEATTR(srcnode); } mn.mc_xcursor = NULL; if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { @@ -7947,7 +8044,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) /* Add the node to the destination page. */ - rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); + rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, + attr, flags); if (unlikely(rc != MDB_SUCCESS)) return rc; @@ -8126,7 +8224,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) key.mv_size = csrc->mc_db->md_xsize; key.mv_data = PAGEDATA(psrc); for (i = 0; i < NUMKEYS(psrc); i++, j++) { - rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); + rc = mdb_node_add(cdst, j, &key, NULL, 0, 0, 0); if (unlikely(rc != MDB_SUCCESS)) return rc; key.mv_data = (char *)key.mv_data + key.mv_size; @@ -8158,7 +8256,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); - rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); + rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), + NODEATTR(srcnode), srcnode->mn_flags); if (unlikely(rc != MDB_SUCCESS)) return rc; } @@ -8573,7 +8672,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, xdata = NULL; flags |= MDB_NODUPDATA; } - rc = mdb_cursor_set(&mc, key, xdata, op, &exact); + rc = mdb_cursor_set(&mc, key, xdata, op, &exact, NULL); if (likely(rc == 0)) { /* let mdb_page_split know about this cursor if needed: * delete will trigger a rebalance; if it needs to move @@ -8599,12 +8698,13 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, * @param[in] newkey The key for the newly inserted node. * @param[in] newdata The data for the newly inserted node. * @param[in] newpgno The page number, if the new node is a branch node. + * @param[in] newattr The node attr for the newly inserted node. * @param[in] nflags The #NODE_ADD_FLAGS for the new node. * @return 0 on success, non-zero on failure. */ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, - unsigned nflags) + uint64_t newattr, unsigned nflags) { unsigned flags; int rc = MDB_SUCCESS, new_root = 0, did_split = 0; @@ -8654,7 +8754,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno new_root = mc->mc_db->md_depth++; /* Add left (implicit) pointer. */ - if (unlikely((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS)) { + if (unlikely((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0, 0)) != MDB_SUCCESS)) { /* undo the pre-push */ mc->mc_pg[0] = mc->mc_pg[1]; mc->mc_ki[0] = mc->mc_ki[1]; @@ -8818,7 +8918,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mn.mc_top--; did_split = 1; /* We want other splits to find mn when doing fixups */ - WITH_CURSOR_TRACKING(mn, rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0)); + WITH_CURSOR_TRACKING(mn, + rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0, 0)); if (unlikely(rc != MDB_SUCCESS)) goto done; @@ -8846,7 +8947,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno } } else { mn.mc_top--; - rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); + rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0, 0); mn.mc_top++; } if (unlikely(rc != MDB_SUCCESS)) @@ -8854,13 +8955,14 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if (nflags & MDB_APPEND) { mc->mc_pg[mc->mc_top] = rp; mc->mc_ki[mc->mc_top] = 0; - rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); + rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, newattr, nflags); if (rc) goto done; for (i=0; imc_top; i++) mc->mc_ki[i] = mn.mc_ki[i]; } else if (!IS_LEAF2(mp)) { /* Move nodes */ + uint64_t rattr; mc->mc_pg[mc->mc_top] = rp; i = split_indx; j = 0; @@ -8868,6 +8970,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if (i == newindx) { rkey.mv_data = newkey->mv_data; rkey.mv_size = newkey->mv_size; + rattr = newattr; if (IS_LEAF(mp)) { rdata = newdata; } else @@ -8879,6 +8982,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); rkey.mv_data = NODEKEY(node); rkey.mv_size = node->mn_ksize; + rattr = NODEATTR(node); if (IS_LEAF(mp)) { xdata.mv_data = NODEDATA(node); xdata.mv_size = NODEDSZ(node); @@ -8893,7 +8997,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno rkey.mv_size = 0; } - rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); + rc = mdb_node_add(mc, j, &rkey, rdata, pgno, rattr, flags); if (rc) goto done; if (i == nkeys) { @@ -9049,6 +9153,27 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, return rc; } +int +mdb_put_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, uint64_t attr, unsigned int flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + + if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) + return EINVAL; + + /** TODO: implement support for DUPSORT? */ + if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + return ENOTSUP; + + if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) + return EINVAL; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_put_attr(&mc, key, data, attr, flags | MDB_SETATTR); +} + #ifndef MDB_WBUF #define MDB_WBUF (1024*1024) #endif @@ -9849,7 +9974,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned flags, MDB_dbi *dbi) key.mv_size = len; key.mv_data = (void *)name; mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); - rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact, NULL); if (likely(rc == MDB_SUCCESS)) { /* make sure this is actually a DB */ MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); diff --git a/mtest7.c b/mtest7.c new file mode 100644 index 00000000..34e925e1 --- /dev/null +++ b/mtest7.c @@ -0,0 +1,137 @@ +/* mtest7.c - memory-mapped database tester/toy */ +/* + * Copyright 2015 Ilya Usvyatsky, Nexenta Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for DB attributes */ +#include +#include +#include +#include +#include +#include "mdbx.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +char dkbuf[1024]; + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + int count; + int *values; + char sval[32]; + uint64_t *timestamps, timestamp; + struct timeval tv; + int env_opt = MDB_NOMEMINIT | MDB_NOSYNC | MDB_NOSUBDIR | MDB_NORDAHEAD; + + srand(time(NULL)); + + memset(sval, 0, sizeof(sval)); + count = (rand()%384) + 64; + if (argc > 1) + count = atoi(argv[1]); + values = (int *)malloc(count*sizeof(int)); + timestamps = (uint64_t *)calloc(count,sizeof(uint64_t)); + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 104857600)); + E(mdb_env_set_maxdbs(env, 8)); + E(mdb_env_open(env, "./mtest7.db", env_opt, 0664)); + + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + + key.mv_size = sizeof(int); + data.mv_size = sizeof(sval); + data.mv_data = sval; + + printf("Adding %d values\n", count); + for (i=0;i= count) { + printf("Timestamp mismatch " + "%d %03x %d %lu != %lu\n", + i, values[i], values[i], timestamps[i], + timestamp); + break; + } + } + } + + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + mdb_dbi_close(env, dbi); + mdb_env_close(env); + + return 0; +} diff --git a/mtest8.c b/mtest8.c new file mode 100644 index 00000000..598e4c06 --- /dev/null +++ b/mtest8.c @@ -0,0 +1,158 @@ +/* mtest8.c - memory-mapped database tester/toy */ +/* + * Copyright 2015 Ilya Usvyatsky, Nexenta Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for DB attributes */ +#include +#include +#include +#include +#include +#include "mdbx.h" + +#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) +#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) +#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ + "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort())) + +char dkbuf[1024]; + +int main(int argc,char * argv[]) +{ + int i = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data, data1; + MDB_txn *txn; + MDB_stat mst; + int count; + int *values; + char sval[8000]; + uint64_t *timestamps, timestamp; + struct timeval tv; + int env_opt = MDB_NOMEMINIT | MDB_NOSYNC | MDB_NOSUBDIR | MDB_NORDAHEAD; + + srand(time(NULL)); + + memset(sval, 0, sizeof(sval)); + count = 2000; //(rand()%384) + 64; + if (argc > 1) + count = atoi(argv[1]); + values = (int *)malloc(count*sizeof(int)); + timestamps = (uint64_t *)calloc(count,sizeof(uint64_t)); + + key.mv_size = sizeof(int); + data.mv_size = sizeof(sval); + data.mv_data = sval; + + values[0] = 42; + values[1] = 17; + + for (i = 2; i < count; ++i) + values[i] = values[i - 1] + values[i - 2]; + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 104857600)); + E(mdb_env_set_maxdbs(env, 8)); + E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + + for (i = 0; i < count; ++i) { + (void)gettimeofday(&tv, NULL); + timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec; + + snprintf(sval, 4000, "Value %d\n", values[i]); + snprintf(sval + 4000, 4000, "Value %d\n", values[i]); + key.mv_data = values + i; + E(mdb_put_attr(txn, dbi, &key, &data, timestamps[i], + MDB_NODUPDATA)); + } + + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + mdb_dbi_close(env, dbi); + mdb_env_close(env); + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 8)); + E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + for (i = 0; i < count; ++i) { + key.mv_data = values + i; + E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(timestamps[i] != timestamp); + + E(mdb_get(txn, dbi, &key, &data1)); + E(data.mv_size != data1.mv_size); + E(memcmp(data.mv_data, data1.mv_data, data.mv_size)); + } + + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + mdb_dbi_close(env, dbi); + mdb_env_close(env); + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 104857600)); + E(mdb_env_set_maxdbs(env, 8)); + E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + + for (i = 0; i < count; ++i) { + (void)gettimeofday(&tv, NULL); + timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec; + + key.mv_data = values + i; + E(mdb_set_attr(txn, dbi, &key, NULL, timestamps[i])); + } + + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + mdb_dbi_close(env, dbi); + mdb_env_close(env); + + E(mdb_env_create(&env)); + E(mdb_env_set_mapsize(env, 10485760)); + E(mdb_env_set_maxdbs(env, 8)); + E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + + E(mdb_txn_begin(env, NULL, 0, &txn)); + E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + for (i = 0; i < count; ++i) { + key.mv_data = values + i; + E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(timestamps[i] != timestamp); + + E(mdb_get(txn, dbi, &key, &data1)); + E(data.mv_size != data1.mv_size); + E(memcmp(data.mv_data, data1.mv_data, data.mv_size)); + } + + E(mdb_txn_commit(txn)); + E(mdb_env_stat(env, &mst)); + + mdb_dbi_close(env, dbi); + mdb_env_close(env); + + return 0; +} From 46b891508781c67ddf278ca73d9649d0a356e60e Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Tue, 29 Mar 2016 04:50:29 +0300 Subject: [PATCH 2/2] mdbx: rework attributes. Change-Id: Id9d436a54ac14ed82c593710b1d5939871c89d1a --- lmdb.h | 350 ++++++++++++++++++++++++++----------------------------- mdb.c | 314 +++++++++++++++++++++++++------------------------ mtest7.c | 31 ++--- mtest8.c | 46 +++----- 4 files changed, 353 insertions(+), 388 deletions(-) diff --git a/lmdb.h b/lmdb.h index bc562e2e..9020778c 100644 --- a/lmdb.h +++ b/lmdb.h @@ -352,8 +352,6 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NODUPDATA 0x20 /** For mdb_cursor_put: overwrite the current key/data pair */ #define MDB_CURRENT 0x40 -/** For mdb_cursor_put_attr: set attribute */ -#define MDB_SETATTR 0x80 /** For put: Just reserve space for data, don't copy it. Return a * pointer to the reserved space. */ @@ -407,7 +405,6 @@ typedef enum MDB_cursor_op { MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ - MDB_GET_ATTR, /**< Get attribute of specified node */ MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; @@ -1359,31 +1356,6 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); */ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); - /** @brief Get items attribute from a database. - * - * This function retrieves key/data pairs attribute from the database. - * The attribute of the specified key-value pair is returned in - * uint64_t to which \b attrp refers. - * If the database supports duplicate keys (#MDB_DUPSORT) then both - * key and data parameters are required, otherwise data is ignored. - * - * @note Values returned from the database are valid only until a - * subsequent update operation, or the end of the transaction. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to search for in the database - * @param[in] data The data for #MDB_DUPSORT databases - * @param[out] attrp The pointer to the result - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_NOTFOUND - the key-value pair was not in the database. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - uint64_t *attrp); - /** @brief Store items into a database. * * This function stores key/data pairs in the database. The default behavior @@ -1434,77 +1406,6 @@ int mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags); - /** @brief Store items and attributes into a database. - * - * This function stores key/data pairs in the database. The default behavior - * is to enter the new key/data pair, replacing any previously existing key - * if duplicates are disallowed, or adding a duplicate data item if - * duplicates are allowed (#MDB_DUPSORT). - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to store in the database - * @param[in] attr The attribute to store in the database - * @param[in,out] data The data to store - * @param[in] flags Special options for this operation. This parameter - * must be set to 0 or by bitwise OR'ing together one or more of the - * values described here. - *
    - *
  • #MDB_NODUPDATA - enter the new key/data pair only if it does not - * already appear in the database. This flag may only be specified - * if the database was opened with #MDB_DUPSORT. The function will - * return #MDB_KEYEXIST if the key/data pair already appears in the - * database. - *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key - * does not already appear in the database. The function will return - * #MDB_KEYEXIST if the key already appears in the database, even if - * the database supports duplicates (#MDB_DUPSORT). The \b data - * parameter will be set to point to the existing item. - *
  • #MDB_RESERVE - reserve space for data of the given size, but - * don't copy the given data. Instead, return a pointer to the - * reserved space, which the caller can fill in later - before - * the next update operation or the transaction ends. This saves - * an extra memcpy if the data is being generated later. - * LMDB does nothing else with this memory, the caller is expected - * to modify all of the space requested. - *
  • #MDB_APPEND - append the given key/data pair to the end of the - * database. This option allows fast bulk loading when keys are - * already known to be in the correct order. Loading unsorted keys - * with this flag will cause a #MDB_KEYEXIST error. - *
  • #MDB_APPENDDUP - as above, but for sorted dup data. - *
- * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). - *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. - *
  • EACCES - an attempt was made to write in a read-only transaction. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_put_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - uint64_t attr, unsigned int flags); - - /** @brief Set items attribute from a database. - * - * This function stores key/data pairs attribute to the database. - * If the database supports duplicate keys (#MDB_DUPSORT) then both - * key and data parameters are required, otherwise data is ignored. - * - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to search for in the database - * @param[in] data The data for #MDB_DUPSORT databases - * @param[in] attr The attribute to be stored - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_NOTFOUND - the key-value pair was not in the database. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_set_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - uint64_t attrp); - /** @brief Delete items from a database. * * This function removes key/data pairs from the database. @@ -1613,30 +1514,6 @@ MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op); - /** @brief Get items attribute from a database cursor. - * - * This function retrieves key/data pairs attribute from the database. - * The attribute of the specified key-value pair is returned in - * uint64_t to which \b attrp refers. - * If the database supports duplicate keys (#MDB_DUPSORT) then both - * key and data parameters are required, otherwise data is ignored. - * - * @note Values returned from the database are valid only until a - * subsequent update operation, or the end of the transaction. - * @param[in] mc A database cursor pointing at the node - * @param[in] key The key to search for in the database - * @param[in] data The data for #MDB_DUPSORT databases - * @param[out] attrp The pointer to the result - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_NOTFOUND - the key-value pair was not in the database. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, - uint64_t *attrp); - /** @brief Store by cursor. * * This function stores key/data pairs into the database. @@ -1699,69 +1576,6 @@ int mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, unsigned flags); - /** @brief Store by cursor with attribute. - * - * This function stores key/data pairs into the database. - * The cursor is positioned at the new item, or on failure usually near it. - * @note Earlier documentation incorrectly said errors would leave the - * state of the cursor unchanged. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @param[in] key The key operated on. - * @param[in] data The data operated on. - * @param[in] attr The attribute. - * @param[in] flags Options for this operation. This parameter - * must be set to 0 or one of the values described here. - *
    - *
  • #MDB_CURRENT - replace the item at the current cursor position. - * The \b key parameter must still be provided, and must match it. - * If using sorted duplicates (#MDB_DUPSORT) the data item must still - * sort into the same place. This is intended to be used when the - * new data is the same size as the old. Otherwise it will simply - * perform a delete of the old record followed by an insert. - *
  • #MDB_NODUPDATA - enter the new key/data pair only if it does not - * already appear in the database. This flag may only be specified - * if the database was opened with #MDB_DUPSORT. The function will - * return #MDB_KEYEXIST if the key/data pair already appears in the - * database. - *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key - * does not already appear in the database. The function will return - * #MDB_KEYEXIST if the key already appears in the database, even if - * the database supports duplicates (#MDB_DUPSORT). - *
  • #MDB_RESERVE - reserve space for data of the given size, but - * don't copy the given data. Instead, return a pointer to the - * reserved space, which the caller can fill in later. This saves - * an extra memcpy if the data is being generated later. - *
  • #MDB_APPEND - append the given key/data pair to the end of the - * database. No key comparisons are performed. This option allows - * fast bulk loading when keys are already known to be in the - * correct order. Loading unsorted keys with this flag will cause - * data corruption. - *
  • #MDB_APPENDDUP - as above, but for sorted dup data. - *
  • #MDB_MULTIPLE - store multiple contiguous data elements in a - * single request. This flag may only be specified if the database - * was opened with #MDB_DUPFIXED. The \b data argument must be an - * array of two MDB_vals. The mv_size of the first MDB_val must be - * the size of a single data element. The mv_data of the first MDB_val - * must point to the beginning of the array of contiguous data elements. - * The mv_size of the second MDB_val must be the count of the number - * of data elements to store. On return this field will be set to - * the count of the number of elements actually written. The mv_data - * of the second MDB_val is unused. - *
  • #MDB_SETATTR - set the attribute of the key/data pair to - * specified value. - *
- * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). - *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. - *
  • EACCES - an attempt was made to write in a read-only transaction. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, - uint64_t attr, unsigned int flags); - /** @brief Delete current key/data pair * * This function deletes the key/data pair to which the cursor refers. @@ -1916,6 +1730,170 @@ int mdbx_env_pgwalk(MDB_txn *txn, MDB_pgvisitor_func* visitor, void* ctx); char* mdb_dkey(MDB_val *key, char *buf); +/* attribute support functions for Nexenta ***********************************/ +#if MDBX_MODE_ENABLED + +typedef uint64_t mdbx_attr_t; + + /** @brief Store by cursor with attribute. + * + * This function stores key/data pairs into the database. + * The cursor is positioned at the new item, or on failure usually near it. + * @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT. + * @note Earlier documentation incorrectly said errors would leave the + * state of the cursor unchanged. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] key The key operated on. + * @param[in] data The data operated on. + * @param[in] attr The attribute. + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + *
    + *
  • #MDB_CURRENT - replace the item at the current cursor position. + * The \b key parameter must still be provided, and must match it. + * This is intended to be used when the + * new data is the same size as the old. Otherwise it will simply + * perform a delete of the old record followed by an insert. + *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database. + *
  • #MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later. This saves + * an extra memcpy if the data is being generated later. + *
  • #MDB_APPEND - append the given key/data pair to the end of the + * database. No key comparisons are performed. This option allows + * fast bulk loading when keys are already known to be in the + * correct order. Loading unsorted keys with this flag will cause + * data corruption. + *
+ * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. + *
  • EACCES - an attempt was made to write in a read-only transaction. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdbx_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + mdbx_attr_t attr, unsigned flags); + + /** @brief Store items and attributes into a database. + * + * This function stores key/data pairs in the database. The default behavior + * is to enter the new key/data pair, replacing any previously existing key + * if duplicates are disallowed. + * @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to store in the database + * @param[in] attr The attribute to store in the database + * @param[in,out] data The data to store + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + *
    + *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database. The \b data + * parameter will be set to point to the existing item. + *
  • #MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later - before + * the next update operation or the transaction ends. This saves + * an extra memcpy if the data is being generated later. + * LMDB does nothing else with this memory, the caller is expected + * to modify all of the space requested. + *
  • #MDB_APPEND - append the given key/data pair to the end of the + * database. This option allows fast bulk loading when keys are + * already known to be in the correct order. Loading unsorted keys + * with this flag will cause a #MDB_KEYEXIST error. + *
+ * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. + *
  • EACCES - an attempt was made to write in a read-only transaction. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdbx_put_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + mdbx_attr_t attr, unsigned flags); + + /** @brief Set items attribute from a database. + * + * This function stores key/data pairs attribute to the database. + * @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[in] data The data to be stored or NULL to save previous value. + * @param[in] attr The attribute to be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_NOTFOUND - the key-value pair was not in the database. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdbx_set_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + mdbx_attr_t attr); + + /** @brief Get items attribute from a database cursor. + * + * This function retrieves key/data pairs attribute from the database. + * The attribute of the specified key-value pair is returned in + * uint64_t to which \b attrptr refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data could be NULL. + * + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] mc A database cursor pointing at the node + * @param[in] key The key to search for in the database + * @param[in,out] data The data for #MDB_DUPSORT databases + * @param[out] attrptr The pointer to the result + * @param[in] op A cursor operation #MDB_cursor_op + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_NOTFOUND - the key-value pair was not in the database. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdbx_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, + mdbx_attr_t *attrptr, MDB_cursor_op op); + + /** @brief Get items attribute from a database. + * + * This function retrieves key/data pairs attribute from the database. + * The attribute of the specified key-value pair is returned in + * uint64_t to which \b attrptr refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then both + * key and data parameters are required, otherwise data is ignored. + * + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[in] data The data for #MDB_DUPSORT databases + * @param[out] attrptr The pointer to the result + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_NOTFOUND - the key-value pair was not in the database. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdbx_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + mdbx_attr_t *attrptr); + +#endif /* MDBX_MODE_ENABLED */ + #ifdef __cplusplus } #endif diff --git a/mdb.c b/mdb.c index 158b71f1..bf7f7d9f 100644 --- a/mdb.c +++ b/mdb.c @@ -591,11 +591,6 @@ typedef struct MDB_node { /** @} */ unsigned short mn_flags; /**< @ref mdb_node */ unsigned short mn_ksize; /**< key size */ -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned int mn_attr_lo, mn_attr_hi; /**< node attribute */ -#else - unsigned int mn_attr_hi, mn_attr_lo; -#endif char mn_data[1]; /**< key and data are appended here */ } MDB_node; @@ -640,13 +635,6 @@ typedef struct MDB_node { (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) /** The size of a key in a node */ #define NODEKSZ(node) ((node)->mn_ksize) - /** The attribute of the node as uint64_t */ -#define NODEATTR(node) \ - ((uint64_t)(node)->mn_attr_lo | ((uint64_t)(node)->mn_attr_hi << 32)) - /** Set node attribute */ -#define SETATTR(node,attr) do { \ - (node)->mn_attr_lo = (attr) & 0xffffffffUL; \ - (node)->mn_attr_hi = (attr) >> 32; } while (0) /** Copy a page number from src to dst */ #ifdef MISALIGNED_OK @@ -1059,7 +1047,7 @@ static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); #define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, - pgno_t newpgno, uint64_t newattr, unsigned nflags); + pgno_t newpgno, unsigned nflags); static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static int mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending); @@ -1067,8 +1055,7 @@ static void mdb_env_close0(MDB_env *env); static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, - uint64_t attr, unsigned flags); + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags); static void mdb_node_del(MDB_cursor *mc, int ksize); static void mdb_node_shrink(MDB_page *mp, indx_t indx); static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft); @@ -1088,7 +1075,7 @@ static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, - int *exactp, uint64_t *attrp); + int *exactp); static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); @@ -1100,7 +1087,6 @@ static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force); static int mdb_drop0(MDB_cursor *mc, int subs); static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead); -static int mdb_cursor_touch(MDB_cursor *mc); /** @cond */ static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int_ai, mdb_cmp_int_a2, mdb_cmp_int_ua; @@ -5779,36 +5765,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, return MDB_BAD_TXN; mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_set(&mc, key, data, MDB_SET, &exact, NULL); -} - -int -mdb_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t *attrp) -{ - int exact = 0; - return mdb_cursor_set(mc, key, data, MDB_GET_ATTR, &exact, attrp); -} - -int -mdb_get_attr(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, uint64_t *attrp) -{ - MDB_cursor mc; - MDB_xcursor mx; - - if (!key || !attrp || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) - return EINVAL; - /** TODO: implement support for DUPSORT? */ - if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - return ENOTSUP; - if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) && !data) - return EINVAL; - - if (txn->mt_flags & MDB_TXN_ERROR) - return MDB_BAD_TXN; - - mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_get_attr(&mc, key, data, attrp); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); } /** Find a sibling for a page. @@ -6033,7 +5990,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) /** Set the cursor on a specific data item. */ static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, - MDB_cursor_op op, int *exactp, uint64_t *attrp) + MDB_cursor_op op, int *exactp) { int rc; MDB_page *mp; @@ -6198,7 +6155,7 @@ set1: } else { ex2p = NULL; } - rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p, attrp); + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); if (unlikely(rc != MDB_SUCCESS)) return rc; } @@ -6222,9 +6179,6 @@ set1: } } - if (op == MDB_GET_ATTR) - *attrp = NODEATTR(leaf); - /* The key already matches in all other cases */ if (op == MDB_SET_RANGE || op == MDB_SET_KEY) MDB_GET_KEY(leaf, key); @@ -6387,7 +6341,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = EINVAL; } else { rc = mdb_cursor_set(mc, key, data, op, - op == MDB_SET_RANGE ? NULL : &exact, NULL); + op == MDB_SET_RANGE ? NULL : &exact); } break; case MDB_GET_MULTIPLE: @@ -6555,7 +6509,7 @@ mdb_cursor_touch(MDB_cursor *mc) #define MDB_NOSPILL 0x8000 int -mdb_cursor_put_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t attr, +mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned flags) { MDB_env *env; @@ -6657,20 +6611,11 @@ mdb_cursor_put_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, uint64_t attr, } } } else { - rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact, NULL); + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); } if ((flags & MDB_NOOVERWRITE) && rc == 0) { mdb_debug("duplicate key [%s]", DKEY(key)); *data = d2; - if (F_ISSET(flags, MDB_SETATTR)) { - /* make sure all cursor pages are writable */ - rc2 = mdb_cursor_touch(mc); - if (rc2) - return rc2; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], - mc->mc_ki[mc->mc_top]); - SETATTR(leaf, attr); - } return MDB_KEYEXIST; } if (rc && unlikely(rc != MDB_NOTFOUND)) @@ -6947,8 +6892,6 @@ current: omp = np; } SETDSZ(leaf, data->mv_size); - if (F_ISSET(flags, MDB_SETATTR)) - SETATTR(leaf, attr); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = PAGEDATA(omp); else @@ -6963,8 +6906,6 @@ current: * also reuse this node if the new data is smaller, * but instead we opt to shrink the node in that case. */ - if (F_ISSET(flags, MDB_SETATTR)) - SETATTR(leaf, attr); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = olddata.mv_data; else if (!(mc->mc_flags & C_SUB)) @@ -6988,10 +6929,10 @@ new_sub: nflags &= ~MDB_APPEND; /* sub-page may need room to grow */ if (!insert_key) nflags |= MDB_SPLIT_REPLACE; - rc = mdb_page_split(mc, key, rdata, P_INVALID, attr, nflags); + rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); } else { /* There is room already in this leaf page. */ - rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, attr, nflags); + rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ MDB_cursor *m2, *m3; @@ -7113,41 +7054,6 @@ bad_sub: return rc; } -int -mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, - unsigned int flags) -{ - flags &= ~MDB_SETATTR; - return mdb_cursor_put_attr(mc, key, data, 0, flags); -} - -int -mdb_set_attr(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, uint64_t attr) -{ - MDB_cursor mc; - MDB_xcursor mx; - MDB_val dummy, *rdata = data ? data : &dummy; - int rc, exact = 1; - - if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) - return EINVAL; - /** TODO: implement support for DUPSORT? */ - if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - return ENOTSUP; - if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) && !data) - return EINVAL; - - if (txn->mt_flags & MDB_TXN_ERROR) - return MDB_BAD_TXN; - - mdb_cursor_init(&mc, txn, dbi, &mx); - if ((rc = mdb_cursor_set(&mc, key, rdata, MDB_SET, &exact, NULL)) != MDB_SUCCESS) - return rc; - return mdb_cursor_put_attr(&mc, key, rdata, attr, - MDB_CURRENT|MDB_SETATTR); -} - int mdb_cursor_del(MDB_cursor *mc, unsigned flags) { @@ -7363,7 +7269,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key) */ static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, uint64_t attr, unsigned flags) + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned flags) { unsigned i; size_t node_size = NODESIZE; @@ -7445,10 +7351,9 @@ update: node = NODEPTR(mp, indx); node->mn_ksize = (key == NULL) ? 0 : key->mv_size; node->mn_flags = flags; - if (IS_LEAF(mp)) { + if (IS_LEAF(mp)) SETDSZ(node,data->mv_size); - SETATTR(node,attr); - } else + else SETPGNO(node,pgno); if (key) @@ -7906,7 +7811,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key) mdb_debug("Not enough room, delta = %d, splitting...", delta); pgno = NODEPGNO(node); mdb_node_del(mc, 0); - return mdb_page_split(mc, key, NULL, pgno, 0, MDB_SPLIT_REPLACE); + return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); } numkeys = NUMKEYS(mp); @@ -7959,7 +7864,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) { MDB_node *srcnode; MDB_val key, data; - uint64_t attr = 0UL; pgno_t srcpg; MDB_cursor mn; int rc; @@ -8007,7 +7911,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); - attr = NODEATTR(srcnode); } mn.mc_xcursor = NULL; if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { @@ -8044,8 +7947,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) /* Add the node to the destination page. */ - rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, - attr, flags); + rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); if (unlikely(rc != MDB_SUCCESS)) return rc; @@ -8224,7 +8126,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) key.mv_size = csrc->mc_db->md_xsize; key.mv_data = PAGEDATA(psrc); for (i = 0; i < NUMKEYS(psrc); i++, j++) { - rc = mdb_node_add(cdst, j, &key, NULL, 0, 0, 0); + rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); if (unlikely(rc != MDB_SUCCESS)) return rc; key.mv_data = (char *)key.mv_data + key.mv_size; @@ -8256,8 +8158,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) data.mv_size = NODEDSZ(srcnode); data.mv_data = NODEDATA(srcnode); - rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), - NODEATTR(srcnode), srcnode->mn_flags); + rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); if (unlikely(rc != MDB_SUCCESS)) return rc; } @@ -8672,7 +8573,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, xdata = NULL; flags |= MDB_NODUPDATA; } - rc = mdb_cursor_set(&mc, key, xdata, op, &exact, NULL); + rc = mdb_cursor_set(&mc, key, xdata, op, &exact); if (likely(rc == 0)) { /* let mdb_page_split know about this cursor if needed: * delete will trigger a rebalance; if it needs to move @@ -8698,13 +8599,12 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, * @param[in] newkey The key for the newly inserted node. * @param[in] newdata The data for the newly inserted node. * @param[in] newpgno The page number, if the new node is a branch node. - * @param[in] newattr The node attr for the newly inserted node. * @param[in] nflags The #NODE_ADD_FLAGS for the new node. * @return 0 on success, non-zero on failure. */ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, - uint64_t newattr, unsigned nflags) + unsigned nflags) { unsigned flags; int rc = MDB_SUCCESS, new_root = 0, did_split = 0; @@ -8754,7 +8654,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno new_root = mc->mc_db->md_depth++; /* Add left (implicit) pointer. */ - if (unlikely((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0, 0)) != MDB_SUCCESS)) { + if (unlikely((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS)) { /* undo the pre-push */ mc->mc_pg[0] = mc->mc_pg[1]; mc->mc_ki[0] = mc->mc_ki[1]; @@ -8918,8 +8818,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno mn.mc_top--; did_split = 1; /* We want other splits to find mn when doing fixups */ - WITH_CURSOR_TRACKING(mn, - rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0, 0)); + WITH_CURSOR_TRACKING(mn, rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0)); if (unlikely(rc != MDB_SUCCESS)) goto done; @@ -8947,7 +8846,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno } } else { mn.mc_top--; - rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0, 0); + rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); mn.mc_top++; } if (unlikely(rc != MDB_SUCCESS)) @@ -8955,14 +8854,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if (nflags & MDB_APPEND) { mc->mc_pg[mc->mc_top] = rp; mc->mc_ki[mc->mc_top] = 0; - rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, newattr, nflags); + rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); if (rc) goto done; for (i=0; imc_top; i++) mc->mc_ki[i] = mn.mc_ki[i]; } else if (!IS_LEAF2(mp)) { /* Move nodes */ - uint64_t rattr; mc->mc_pg[mc->mc_top] = rp; i = split_indx; j = 0; @@ -8970,7 +8868,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno if (i == newindx) { rkey.mv_data = newkey->mv_data; rkey.mv_size = newkey->mv_size; - rattr = newattr; if (IS_LEAF(mp)) { rdata = newdata; } else @@ -8982,7 +8879,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE); rkey.mv_data = NODEKEY(node); rkey.mv_size = node->mn_ksize; - rattr = NODEATTR(node); if (IS_LEAF(mp)) { xdata.mv_data = NODEDATA(node); xdata.mv_size = NODEDSZ(node); @@ -8997,7 +8893,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno rkey.mv_size = 0; } - rc = mdb_node_add(mc, j, &rkey, rdata, pgno, rattr, flags); + rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); if (rc) goto done; if (i == nkeys) { @@ -9153,27 +9049,6 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi, return rc; } -int -mdb_put_attr(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, uint64_t attr, unsigned int flags) -{ - MDB_cursor mc; - MDB_xcursor mx; - - if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID)) - return EINVAL; - - /** TODO: implement support for DUPSORT? */ - if (txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - return ENOTSUP; - - if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) - return EINVAL; - - mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_put_attr(&mc, key, data, attr, flags | MDB_SETATTR); -} - #ifndef MDB_WBUF #define MDB_WBUF (1024*1024) #endif @@ -9974,7 +9849,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned flags, MDB_dbi *dbi) key.mv_size = len; key.mv_data = (void *)name; mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); - rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact, NULL); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); if (likely(rc == MDB_SUCCESS)) { /* make sure this is actually a DB */ MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); @@ -10709,6 +10584,143 @@ mdbx_env_pgwalk(MDB_txn *txn, MDB_pgvisitor_func* visitor, void* user) return rc; } +/* attribute support functions for Nexenta ***********************************/ + +static __inline int +mdbx_attr_peek(MDB_val *data, mdbx_attr_t *attrptr) +{ + if (unlikely(data->mv_size < sizeof(mdbx_attr_t))) + return MDB_INCOMPATIBLE; + + if (likely(attrptr != NULL)) + *attrptr = *(mdbx_attr_t*) data->mv_data; + data->mv_size -= sizeof(mdbx_attr_t); + data->mv_data = likely(data->mv_size > 0) + ? ((mdbx_attr_t*) data->mv_data) + 1 : NULL; + + return MDB_SUCCESS; +} + +static __inline int +mdbx_attr_poke(MDB_val *reserved, MDB_val *data, mdbx_attr_t attr, unsigned flags) +{ + mdbx_attr_t *space = reserved->mv_data; + if (flags & MDB_RESERVE) { + if (likely(data != NULL)) { + data->mv_data = data->mv_size ? space + 1 : NULL; + } + } else { + *space = attr; + if (likely(data != NULL)) { + memcpy(space + 1, data->mv_data, data->mv_size ); + } + } + + return MDB_SUCCESS; +} + +int +mdbx_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data, + mdbx_attr_t *attrptr, MDB_cursor_op op) +{ + int rc = mdbx_cursor_get(mc, key, data, op); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_peek(data, attrptr); +} + +int +mdbx_get_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, uint64_t *attrptr) +{ + int rc = mdbx_get(txn, dbi, key, data); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_peek(data, attrptr); +} + +int +mdbx_put_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, mdbx_attr_t attr, unsigned flags) +{ + MDB_val reserve = { + .mv_data = NULL, + .mv_size = (data ? data->mv_size : 0) + sizeof(mdbx_attr_t) + }; + + int rc = mdbx_put(txn, dbi, key, &reserve, flags | MDB_RESERVE); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_poke(&reserve, data, attr, flags); +} + +int mdbx_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + mdbx_attr_t attr, unsigned flags) +{ + MDB_val reserve = { + .mv_data = NULL, + .mv_size = (data ? data->mv_size : 0) + sizeof(mdbx_attr_t) + }; + + int rc = mdbx_cursor_put(cursor, key, &reserve, flags | MDB_RESERVE); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + return mdbx_attr_poke(&reserve, data, attr, flags); +} + +int mdbx_set_attr(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, mdbx_attr_t attr) +{ + MDB_cursor mc; + MDB_xcursor mx; + MDB_val old_data; + mdbx_attr_t old_attr; + int rc; + + if (unlikely(!key || !txn)) + return EINVAL; + + if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDB_VERSION_MISMATCH; + + if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) + return EINVAL; + + if (unlikely(txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + rc = mdb_cursor_set(&mc, key, &old_data, MDB_SET, NULL); + if (unlikely(rc != MDB_SUCCESS)) { + if (rc == MDB_NOTFOUND && data) { + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdbx_cursor_put_attr(&mc, key, data, attr, 0); + txn->mt_cursors[dbi] = mc.mc_next; + } + return rc; + } + + rc = mdbx_attr_peek(&old_data, &old_attr); + if (unlikely(rc != MDB_SUCCESS)) + return rc; + + if (old_attr == attr && (!data || + (data->mv_size == old_data.mv_size + && memcpy(data->mv_data, old_data.mv_data, old_data.mv_size) == 0))) + return MDB_SUCCESS; + + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdbx_cursor_put_attr(&mc, key, data ? data : &old_data, attr, MDB_CURRENT); + txn->mt_cursors[dbi] = mc.mc_next; + return rc; +} + #endif /* MDBX_MODE_ENABLED */ /** @} */ diff --git a/mtest7.c b/mtest7.c index 34e925e1..0e15bc9e 100644 --- a/mtest7.c +++ b/mtest7.c @@ -27,6 +27,10 @@ char dkbuf[1024]; +#ifndef DBPATH +# define DBPATH "./testdb/data.mdb" +#endif + int main(int argc,char * argv[]) { int i = 0, j = 0, rc; @@ -51,10 +55,11 @@ int main(int argc,char * argv[]) values = (int *)malloc(count*sizeof(int)); timestamps = (uint64_t *)calloc(count,sizeof(uint64_t)); + unlink(DBPATH); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 104857600)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest7.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi)); @@ -70,33 +75,17 @@ int main(int argc,char * argv[]) values[i] = rand()%16383 ^ (timestamps[i] & 0xffff); key.mv_data = values + i; sprintf(sval, "%03x %d foo bar", values[i], values[i]); - rc = mdb_set_attr(txn, dbi, &key, &data, timestamps[i]); - if (rc && rc != MDB_NOTFOUND) { - printf("mdb_set_attr returned %d\n", rc); - break; - } - if (i % 2) { - if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, - MDB_NODUPDATA))) { - j++; - continue; - } - E(mdb_set_attr(txn, dbi, &key, &data, timestamps[i])); - } else - E(mdb_put_attr(txn, dbi, &key, &data, timestamps[i], - MDB_NODUPDATA)); + E(mdbx_put_attr(txn, dbi, &key, &data, timestamps[i], MDB_NODUPDATA)); } if (j) printf("%d duplicates skipped\n", j); E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 10485760)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest7.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi)); @@ -105,7 +94,7 @@ int main(int argc,char * argv[]) continue; key.mv_data = values + i; sprintf(sval, "%03x %d foo bar", values[i], values[i]); - E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(mdbx_get_attr(txn, dbi, &key, &data, ×tamp)); if (timestamps[i] != timestamp) { for (j = 0; j < count; ++j) { if (j != i && values[i] == values[j] && @@ -129,8 +118,6 @@ int main(int argc,char * argv[]) E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); return 0; diff --git a/mtest8.c b/mtest8.c index 598e4c06..f5895628 100644 --- a/mtest8.c +++ b/mtest8.c @@ -27,12 +27,16 @@ char dkbuf[1024]; +#ifndef DBPATH +# define DBPATH "./testdb/data.mdb" +#endif + int main(int argc,char * argv[]) { int i = 0, rc; MDB_env *env; MDB_dbi dbi; - MDB_val key, data, data1; + MDB_val key, data; MDB_txn *txn; MDB_stat mst; int count; @@ -61,10 +65,11 @@ int main(int argc,char * argv[]) for (i = 2; i < count; ++i) values[i] = values[i - 1] + values[i - 2]; + unlink(DBPATH); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 104857600)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); @@ -76,82 +81,65 @@ int main(int argc,char * argv[]) snprintf(sval, 4000, "Value %d\n", values[i]); snprintf(sval + 4000, 4000, "Value %d\n", values[i]); key.mv_data = values + i; - E(mdb_put_attr(txn, dbi, &key, &data, timestamps[i], - MDB_NODUPDATA)); + E(mdbx_put_attr(txn, dbi, &key, &data, timestamps[i], MDB_NOOVERWRITE)); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 10485760)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); - E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi)); for (i = 0; i < count; ++i) { key.mv_data = values + i; - E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(mdbx_get_attr(txn, dbi, &key, &data, ×tamp)); E(timestamps[i] != timestamp); - - E(mdb_get(txn, dbi, &key, &data1)); - E(data.mv_size != data1.mv_size); - E(memcmp(data.mv_data, data1.mv_data, data.mv_size)); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 104857600)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); - E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi)); for (i = 0; i < count; ++i) { (void)gettimeofday(&tv, NULL); timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec; key.mv_data = values + i; - E(mdb_set_attr(txn, dbi, &key, NULL, timestamps[i])); + E(mdbx_set_attr(txn, dbi, &key, NULL, timestamps[i])); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); E(mdb_env_create(&env)); E(mdb_env_set_mapsize(env, 10485760)); E(mdb_env_set_maxdbs(env, 8)); - E(mdb_env_open(env, "./mtest8.db", env_opt, 0664)); + E(mdb_env_open(env, DBPATH, env_opt, 0664)); E(mdb_txn_begin(env, NULL, 0, &txn)); - E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi)); + E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi)); for (i = 0; i < count; ++i) { key.mv_data = values + i; - E(mdb_get_attr(txn, dbi, &key, &data, ×tamp)); + E(mdbx_get_attr(txn, dbi, &key, &data, ×tamp)); E(timestamps[i] != timestamp); - - E(mdb_get(txn, dbi, &key, &data1)); - E(data.mv_size != data1.mv_size); - E(memcmp(data.mv_data, data1.mv_data, data.mv_size)); } E(mdb_txn_commit(txn)); E(mdb_env_stat(env, &mst)); - - mdb_dbi_close(env, dbi); mdb_env_close(env); return 0;