mirror of
				https://github.com/isar/libmdbx.git
				synced 2025-10-31 15:38:57 +08:00 
			
		
		
		
	mdbx: refine mdbx_page_spill().
				
					
				
			More for https://github.com/erthink/libmdbx/issues/132. Change-Id: I40788c13b54453c17d5e5dae6c3d2f93226f8e00
This commit is contained in:
		
							
								
								
									
										73
									
								
								src/core.c
									
									
									
									
									
								
							
							
						
						
									
										73
									
								
								src/core.c
									
									
									
									
									
								
							| @@ -4317,51 +4317,58 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key, | |||||||
|                            const MDBX_val *data) { |                            const MDBX_val *data) { | ||||||
|   if (mc->mc_flags & C_SUB) |   if (mc->mc_flags & C_SUB) | ||||||
|     return MDBX_SUCCESS; |     return MDBX_SUCCESS; | ||||||
|  |  | ||||||
|   MDBX_txn *txn = mc->mc_txn; |   MDBX_txn *txn = mc->mc_txn; | ||||||
|   MDBX_dpl *const dl = txn->tw.dirtylist; |   if (txn->mt_flags & MDBX_WRITEMAP) | ||||||
|  |  | ||||||
|   /* Estimate how much space this op will take */ |  | ||||||
|   pgno_t i = mc->mc_db->md_depth; |  | ||||||
|   /* Named DBs also dirty the main DB */ |  | ||||||
|   if (mc->mc_dbi >= CORE_DBS) |  | ||||||
|     i += txn->mt_dbs[MAIN_DBI].md_depth; |  | ||||||
|   /* For puts, roughly factor in the key+data size */ |  | ||||||
|   if (key) |  | ||||||
|     i += bytes2pgno(txn->mt_env, node_size(key, data) + txn->mt_env->me_psize); |  | ||||||
|   i += i; /* double it for good measure */ |  | ||||||
|   pgno_t need = i; |  | ||||||
|  |  | ||||||
|   if (txn->tw.dirtyroom > i) |  | ||||||
|     return MDBX_SUCCESS; |     return MDBX_SUCCESS; | ||||||
|  |  | ||||||
|   /* Less aggressive spill - we originally spilled the entire dirty list, |   /* Estimate how much space this op will take: */ | ||||||
|    * with a few exceptions for cursor pages and DB root pages. But this |   /* 1) Max b-tree height, reasonable enough with including dups' sub-tree */ | ||||||
|    * turns out to be a lot of wasted effort because in a large txn many |   size_t need = CURSOR_STACK + 3; | ||||||
|    * of those pages will need to be used again. So now we spill only 1/8th |   /* 2) GC/FreeDB for any payload */ | ||||||
|    * of the dirty pages. Testing revealed this to be a good tradeoff, |   if (mc->mc_dbi > FREE_DBI) { | ||||||
|    * better than 1/2, 1/4, or 1/10. */ |     need += txn->mt_dbs[FREE_DBI].md_depth + 3; | ||||||
|   if (need < txn->mt_env->me_options.dp_limit / 8) |     /* 3) Named DBs also dirty the main DB */ | ||||||
|     need = txn->mt_env->me_options.dp_limit / 8; |     if (mc->mc_dbi > MAIN_DBI) | ||||||
|  |       need += txn->mt_dbs[MAIN_DBI].md_depth + 3; | ||||||
|  |   } | ||||||
|  |   /* 4) Roughly factor in the key+data size */ | ||||||
|  |   need += bytes2pgno(txn->mt_env, node_size(key, data)) + 1; | ||||||
|  |   /* 5) Double it for safety enough reserve */ | ||||||
|  |   need += need; | ||||||
|  |   if (likely(txn->tw.dirtyroom > need)) | ||||||
|  |     return MDBX_SUCCESS; | ||||||
|  |  | ||||||
|  |   const size_t spill_min = (txn->tw.dirtylist->length / /* TODO: options */ 8); | ||||||
|  |   const size_t spill_max = (txn->tw.dirtylist->length / /* TODO: options */ 2); | ||||||
|  |   size_t spill = need - txn->tw.dirtyroom; | ||||||
|  |   spill = (spill < spill_max) ? spill : spill_max; | ||||||
|  |   spill = (spill > spill_min) ? spill : spill_min; | ||||||
|  |  | ||||||
|  |   int rc; | ||||||
|   if (!txn->tw.spill_pages) { |   if (!txn->tw.spill_pages) { | ||||||
|     txn->tw.spill_pages = mdbx_pnl_alloc(need); |     txn->tw.spill_pages = mdbx_pnl_alloc(spill); | ||||||
|     if (unlikely(!txn->tw.spill_pages)) |     if (unlikely(!txn->tw.spill_pages)) { | ||||||
|       return MDBX_ENOMEM; |       rc = MDBX_ENOMEM; | ||||||
|  |       goto bailout; | ||||||
|  |     } | ||||||
|   } else { |   } else { | ||||||
|     /* purge deleted slots */ |     /* purge deleted slots */ | ||||||
|     mdbx_pnl_purge_odd(txn->tw.spill_pages, 1); |     mdbx_pnl_purge_odd(txn->tw.spill_pages, 1); | ||||||
|  |     mdbx_pnl_reserve(&txn->tw.spill_pages, spill); | ||||||
|   } |   } | ||||||
|  |   mdbx_notice("spilling %zu pages (have %u dirty-room, need %zu)", spill, | ||||||
|  |               txn->tw.dirtyroom, need); | ||||||
|  |  | ||||||
|   /* Preserve pages which may soon be dirtied again */ |   /* Preserve pages which may soon be dirtied again */ | ||||||
|   mdbx_pages_xkeep(mc, P_DIRTY, true); |   mdbx_pages_xkeep(mc, P_DIRTY, true); | ||||||
|  |  | ||||||
|  |   MDBX_dpl *const dl = mdbx_dpl_sort(txn->tw.dirtylist); | ||||||
|   /* Save the page IDs of all the pages we're flushing */ |   /* Save the page IDs of all the pages we're flushing */ | ||||||
|   /* flush from the tail forward, this saves a lot of shifting later on. */ |   /* flush from the tail forward, this saves a lot of shifting later on. */ | ||||||
|   int rc; |   size_t keep = dl->length; | ||||||
|   for (i = dl->length; i && need; i--) { |   for (; keep && spill; keep--) { | ||||||
|     pgno_t pn = dl->items[i].pgno << 1; |     pgno_t pn = dl->items[keep].pgno << 1; | ||||||
|     MDBX_page *dp = dl->items[i].ptr; |     MDBX_page *dp = dl->items[keep].ptr; | ||||||
|     if (dp->mp_flags & (P_LOOSE | P_KEEP)) |     if (dp->mp_flags & (P_LOOSE | P_KEEP)) | ||||||
|       continue; |       continue; | ||||||
|     /* Can't spill twice, |     /* Can't spill twice, | ||||||
| @@ -4381,17 +4388,17 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key, | |||||||
|     rc = mdbx_pnl_append(&txn->tw.spill_pages, pn); |     rc = mdbx_pnl_append(&txn->tw.spill_pages, pn); | ||||||
|     if (unlikely(rc != MDBX_SUCCESS)) |     if (unlikely(rc != MDBX_SUCCESS)) | ||||||
|       goto bailout; |       goto bailout; | ||||||
|     need--; |     spill--; | ||||||
|   } |   } | ||||||
|   mdbx_pnl_sort(txn->tw.spill_pages); |   mdbx_pnl_sort(txn->tw.spill_pages); | ||||||
|  |  | ||||||
|   /* Flush the spilled part of dirty list */ |   /* Flush the spilled part of dirty list */ | ||||||
|   rc = mdbx_page_flush(txn, i); |   rc = mdbx_page_flush(txn, keep); | ||||||
|   if (unlikely(rc != MDBX_SUCCESS)) |   if (unlikely(rc != MDBX_SUCCESS)) | ||||||
|     goto bailout; |     goto bailout; | ||||||
|  |  | ||||||
|   /* Reset any dirty pages we kept that page_flush didn't see */ |   /* Reset any dirty pages we kept that page_flush didn't see */ | ||||||
|   mdbx_pages_xkeep(mc, P_DIRTY | P_KEEP, i != 0); |   mdbx_pages_xkeep(mc, P_DIRTY | P_KEEP, keep > 0); | ||||||
|  |  | ||||||
| bailout: | bailout: | ||||||
|   txn->mt_flags |= rc ? MDBX_TXN_ERROR : MDBX_TXN_SPILLS; |   txn->mt_flags |= rc ? MDBX_TXN_ERROR : MDBX_TXN_SPILLS; | ||||||
|   | |||||||
| @@ -831,13 +831,10 @@ struct MDBX_txn { | |||||||
|   }; |   }; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /* Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. |  | ||||||
|  * At 4 keys per node, enough for 2^64 nodes, so there's probably no need to |  | ||||||
|  * raise this on a 64 bit machine. */ |  | ||||||
| #if MDBX_WORDBITS >= 64 | #if MDBX_WORDBITS >= 64 | ||||||
| #define CURSOR_STACK 28 | #define CURSOR_STACK 32 | ||||||
| #else | #else | ||||||
| #define CURSOR_STACK 20 | #define CURSOR_STACK 24 | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| struct MDBX_xcursor; | struct MDBX_xcursor; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user