diff --git a/mdbx.h b/mdbx.h index a9c085fe..6461ab6d 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1917,6 +1917,29 @@ enum MDBX_option_t { * bottom. Default is 8, i.e. at least the 1/8 of the current dirty pages * should be spilled when reached the condition described above. */ MDBX_opt_spill_min_denominator, + + /** \brief Controls the in-process how part of the parent transaction dirty + * pages will be spilled while start each child transaction. + * + * \details The `MDBX_opt_spill_parent4child_denominator` defines the + * denominator to determine how part of parent transaction dirty pages will be + * spilled explicitly while start each child transaction. + * Exactly `pages_to_spill = dirty_pages / N`, + * where `N` is the value set by `MDBX_opt_spill_parent4child_denominator`. + * + * For a stack of nested transactions each dirty page could be spilled only + * once, and parent's dirty pages couldn't be spilled while child + * transaction(s) are running. Therefore a child transaction could reach + * \ref MDBX_TXN_FULL when parent(s) transaction has spilled too less (and + * child reach the limit of dirty pages), either when parent(s) has spilled + * too more (since child can't spill already spilled pages). So there is no + * universal golden ratio. + * + * Should be in the range 0..255, where zero means no explicit spilling will + * be performed during starting nested transactions. + * Default is 0, i.e. by default no spilling performed during starting nested + * transactions, that correspond historically behaviour. */ + MDBX_opt_spill_parent4child_denominator, }; #ifndef __cplusplus /** \ingroup c_settings */ diff --git a/src/core.c b/src/core.c index 16cce2c8..964c20c1 100644 --- a/src/core.c +++ b/src/core.c @@ -6893,6 +6893,15 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; + + if (env->me_options.spill_parent4child_denominator) { + /* Spill dirty-pages of parent to provide dirtyroom for child txn */ + rc = mdbx_txn_spill(parent, nullptr, + parent->tw.dirtylist->length / + env->me_options.spill_parent4child_denominator); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } mdbx_tassert(parent, mdbx_audit_ex(parent, 0, false) == 0); flags |= parent->mt_flags & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS); @@ -9922,6 +9931,7 @@ __cold int mdbx_env_create(MDBX_env **penv) { env->me_options.dp_initial = env->me_options.dp_limit; env->me_options.spill_max_denominator = 8; env->me_options.spill_min_denominator = 8; + env->me_options.spill_parent4child_denominator = 0; int rc; const size_t os_psize = mdbx_syspagesize(); @@ -20462,6 +20472,11 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return MDBX_EINVAL; env->me_options.spill_min_denominator = (uint8_t)value; break; + case MDBX_opt_spill_parent4child_denominator: + if (unlikely(value > 255)) + return MDBX_EINVAL; + env->me_options.spill_parent4child_denominator = (uint8_t)value; + break; default: return MDBX_EINVAL; @@ -20522,6 +20537,9 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, case MDBX_opt_spill_min_denominator: *value = env->me_options.spill_min_denominator; break; + case MDBX_opt_spill_parent4child_denominator: + *value = env->me_options.spill_parent4child_denominator; + break; default: return MDBX_EINVAL; diff --git a/src/internals.h b/src/internals.h index 44d48e35..ecdab895 100644 --- a/src/internals.h +++ b/src/internals.h @@ -985,6 +985,7 @@ struct MDBX_env { unsigned dp_initial; uint8_t spill_max_denominator; uint8_t spill_min_denominator; + uint8_t spill_parent4child_denominator; } me_options; struct { #if MDBX_LOCKING > 0