mdbx: rework nested transactions and write-transaction state.

This resolves https://github.com/leo-yuriev/libmdbx/issues/62

Change-Id: Icbd762ca69ab1ccf8c3440c71931d2ac14599a1b
This commit is contained in:
Leonid Yuriev 2019-10-08 19:57:17 +03:00
parent 1c9c637701
commit 273af26244
2 changed files with 600 additions and 621 deletions

File diff suppressed because it is too large Load Diff

View File

@ -658,7 +658,7 @@ typedef pgno_t *MDBX_PNL;
#define MDBX_PNL_DISORDERED(first, last) ((first) <= (last))
#endif
/* List of txnid, only for MDBX_env.mt_lifo_reclaimed */
/* List of txnid, only for MDBX_txn.tw.lifo_reclaimed */
typedef txnid_t *MDBX_TXL;
/* An Dirty-Page list item is an pgno/pointer pair. */
@ -747,31 +747,42 @@ struct MDBX_txn {
* aborts, the ID may be re-used by the next writer. */
txnid_t mt_txnid;
MDBX_env *mt_env; /* the DB environment */
/* The list of reclaimed txns from freeDB */
MDBX_TXL mt_lifo_reclaimed;
/* The list of pages that became unused during this transaction. */
MDBX_PNL mt_retired_pages;
/* The list of loose pages that became unused and may be reused
* in this transaction, linked through NEXT_LOOSE_PAGE(page). */
MDBX_page *mt_loose_pages;
/* Number of loose pages (mt_loose_pages) */
unsigned mt_loose_count;
/* The sorted list of dirty pages we temporarily wrote to disk
* because the dirty list was full. page numbers in here are
* shifted left by 1, deleted slots have the LSB set. */
MDBX_PNL mt_spill_pages;
union {
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
MDBX_DPL mt_rw_dirtylist;
/* For read txns: This thread/txn's reader table slot, or NULL. */
MDBX_reader *mt_ro_reader;
};
/* Array of records for each DB known in the environment. */
MDBX_dbx *mt_dbxs;
/* Array of MDBX_db records for each known DB */
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
union {
struct {
/* For read txns: This thread/txn's reader table slot, or NULL. */
MDBX_reader *reader;
} to;
struct {
/* The list of reclaimed txns from GC */
MDBX_TXL lifo_reclaimed;
/* The list of pages that became unused during this transaction. */
MDBX_PNL retired_pages;
/* The list of loose pages that became unused and may be reused
* in this transaction, linked through NEXT_LOOSE_PAGE(page). */
MDBX_page *loose_pages;
/* Number of loose pages (tw.loose_pages) */
unsigned loose_count;
/* The sorted list of dirty pages we temporarily wrote to disk
* because the dirty list was full. page numbers in here are
* shifted left by 1, deleted slots have the LSB set. */
MDBX_PNL spill_pages;
/* dirtylist room: Array size - dirty pages visible to this txn.
* Includes ancestor txns' dirty pages not hidden by other txns'
* dirty/spilled pages. Thus commit(nested txn) has room to merge
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
unsigned dirtyroom;
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
MDBX_DPL dirtylist;
pgno_t *reclaimed_pglist; /* Reclaimed freeDB pages */
txnid_t last_reclaimed; /* ID of last used record */
} tw;
};
/* Transaction DB Flags */
#define DB_DIRTY MDBX_TBL_DIRTY /* DB was written in this txn */
@ -790,11 +801,6 @@ struct MDBX_txn {
* This number only ever increments until the txn finishes; we
* don't decrement it when individual DB handles are closed. */
MDBX_dbi mt_numdbs;
/* dirtylist room: Array size - dirty pages visible to this txn.
* Includes ancestor txns' dirty pages not hidden by other txns'
* dirty/spilled pages. Thus commit(nested txn) has room to merge
* dirtylist into mt_parent after freeing hidden mt_parent pages. */
unsigned mt_dirtyroom;
size_t mt_owner; /* thread ID that owns this transaction */
mdbx_canary mt_canary;
};
@ -843,7 +849,7 @@ struct MDBX_cursor {
#define C_DEL 0x08 /* last op was a cursor_del */
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
#define C_RECLAIMING 0x20 /* FreeDB lookup is prohibited */
#define C_GCFREEZE 0x40 /* me_reclaimed_pglist must not be updated */
#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */
unsigned mc_flags; /* see mdbx_cursor */
MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */
indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */
@ -884,13 +890,6 @@ typedef struct MDBX_cursor_couple {
(mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \
} while (0)
/* State of FreeDB old pages, stored in the MDBX_env */
typedef struct MDBX_pgstate {
pgno_t *mf_reclaimed_pglist; /* Reclaimed freeDB pages, or NULL before use */
txnid_t mf_last_reclaimed; /* ID of last used record, or 0 if
!mf_reclaimed_pglist */
} MDBX_pgstate;
/* The database environment. */
struct MDBX_env {
#define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
@ -932,11 +931,8 @@ struct MDBX_env {
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
unsigned *me_dbiseqs; /* array of dbi sequence numbers */
volatile txnid_t *me_oldest; /* ID of oldest reader last time we looked */
MDBX_pgstate me_pgstate; /* state of old pages from freeDB */
#define me_last_reclaimed me_pgstate.mf_last_reclaimed
#define me_reclaimed_pglist me_pgstate.mf_reclaimed_pglist
MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */
/* PNL of pages that became unused in a write txn */
MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */
/* PNL of pages that became unused in a write txn */
MDBX_PNL me_retired_pages;
/* MDBX_DP of pages written during a write txn. Length MDBX_DPL_TXNFULL. */
MDBX_DPL me_dirtylist;
@ -991,12 +987,6 @@ struct MDBX_env {
#endif
};
/* Nested transaction */
typedef struct MDBX_ntxn {
MDBX_txn mnt_txn; /* the transaction */
MDBX_pgstate mnt_pgstate; /* parent transaction's saved freestate */
} MDBX_ntxn;
/*----------------------------------------------------------------------------*/
/* Debug and Logging stuff */
@ -1235,7 +1225,7 @@ MDBX_INTERNAL_FUNC void mdbx_rthc_thread_dtor(void *ptr);
/* The number of overflow pages needed to store the given size. */
#define OVPAGES(env, size) (bytes2pgno(env, PAGEHDRSZ - 1 + (size)) + 1)
/* Link in MDBX_txn.mt_loose_pages list.
/* Link in MDBX_txn.tw.loose_pages list.
* Kept outside the page header, which is needed when reusing the page. */
#define NEXT_LOOSE_PAGE(p) (*(MDBX_page **)((p) + 2))