From e7488bc30cf00cde227ea3413da88fc834e17393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 2 Aug 2024 12:12:29 +0300 Subject: [PATCH] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Ftxn=5Fcopy2fd()`=20=D0=B8=20?= =?UTF-8?q?`mdbx=5Ftxn=5Fcopy2pathname()`,=20=D0=B2=D0=BA=D0=BB=D1=8E?= =?UTF-8?q?=D1=87=D0=B0=D1=8F=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB=D0=BD=D0=B8?= =?UTF-8?q?=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B5=20=D0=BE=D0=BF=D1=86?= =?UTF-8?q?=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 98 ++++++++++++++++- src/copy.c | 313 +++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 325 insertions(+), 86 deletions(-) diff --git a/mdbx.h b/mdbx.h index 9ae8b798..0d04c73f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1713,7 +1713,24 @@ typedef enum MDBX_copy_flags { MDBX_CP_COMPACT = 1u, /** Force to make resizable copy, i.e. dynamic size instead of fixed */ - MDBX_CP_FORCE_DYNAMIC_SIZE = 2u + MDBX_CP_FORCE_DYNAMIC_SIZE = 2u, + + /** Don't explicitly flush the written data to an output media */ + MDBX_CP_DONT_FLUSH = 4u, + + /** Use read transaction parking during copying MVCC-snapshot + * \see mdbx_txn_park() */ + MDBX_CP_THROTTLE_MVCC = 8u, + + /** Abort/dispose passed transaction after copy + * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ + MDBX_CP_DISPOSE_TXN = 16u, + + /** Enable renew/restart read transaction in case it use outdated + * MVCC shapshot, otherwise the \ref MDBX_MVCC_RETARDED will be returned + * \see mdbx_txn_copy2fd() \see mdbx_txn_copy2pathname() */ + MDBX_CP_RENEW_TXN = 32u + } MDBX_copy_flags_t; DEFINE_ENUM_FLAG_OPERATORS(MDBX_copy_flags) @@ -1986,8 +2003,12 @@ typedef enum MDBX_error { * recycling old MVCC snapshots. */ MDBX_OUSTED = -30411, + /** MVCC snapshot used by read transaction is outdated and could not be + * copied since corresponding meta-pages was overwritten. */ + MDBX_MVCC_RETARDED = -30410, + /* The last of MDBX-added error codes */ - MDBX_LAST_ADDED_ERRCODE = MDBX_OUSTED, + MDBX_LAST_ADDED_ERRCODE = MDBX_MVCC_RETARDED, #if defined(_WIN32) || defined(_WIN64) MDBX_ENODATA = ERROR_HANDLE_EOF, @@ -2582,6 +2603,8 @@ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, * transaction. See long-lived transactions under \ref restrictions section. * * \note On Windows the \ref mdbx_env_copyW() is recommended to use. + * \see mdbx_env_copy2fd() + * \see mdbx_txn_copy2pathname() * * \param [in] env An environment handle returned by mdbx_env_create(). * It must have already been opened successfully. @@ -2608,12 +2631,56 @@ LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname, LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest, MDBX_copy_flags_t flags); +/** \brief Copy an MDBX environment by given read transaction to the specified + * path, with options. + * \ingroup c_extra + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * \note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under \ref restrictions section. + * + * \note On Windows the \ref mdbx_txn_copy2pathnameW() is recommended to use. + * \see mdbx_txn_copy2fd() + * \see mdbx_env_copy() + * + * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). + * \param [in] dest The pathname of a file in which the copy will reside. + * This file must not be already exist, but parent directory + * must be writable. + * \param [in] flags Specifies options for this operation. This parameter + * must be bitwise OR'ing together any of the constants + * described here: + * + * - \ref MDBX_CP_DEFAULTS + * Perform copy as-is without compaction, etc. + * + * - \ref MDBX_CP_COMPACT + * Perform compaction while copying: omit free pages and sequentially + * renumber all pages in output. This option consumes little bit more + * CPU for processing, but may running quickly than the default, on + * account skipping free pages. + * + * - \ref MDBX_CP_FORCE_DYNAMIC_SIZE + * Force to make resizable copy, i.e. dynamic size instead of fixed. + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest, + MDBX_copy_flags_t flags); + #if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) /** \copydoc mdbx_env_copy() * \note Available only on Windows. * \see mdbx_env_copy() */ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, MDBX_copy_flags_t flags); + +/** \copydoc mdbx_txn_copy2pathname() + * \note Available only on Windows. + * \see mdbx_txn_copy2pathname() */ +LIBMDBX_API int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest, + MDBX_copy_flags_t flags); #endif /* Windows */ /** \brief Copy an environment to the specified file descriptor, with @@ -2623,6 +2690,7 @@ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, * This function may be used to make a backup of an existing environment. * No lockfile is created, since it gets recreated at need. * \see mdbx_env_copy() + * \see mdbx_txn_copy2fd() * * \note This call can trigger significant file size growth if run in * parallel with write transactions, because it employs a read-only @@ -2642,6 +2710,32 @@ LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest, LIBMDBX_API int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, MDBX_copy_flags_t flags); +/** \brief Copy an environment by given read transaction to the specified file + * descriptor, with options. + * \ingroup c_extra + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * \see mdbx_txn_copy2pathname() + * \see mdbx_env_copy2fd() + * + * \note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under \ref restrictions + * section. + * + * \note Fails if the environment has suffered a page leak and the destination + * file descriptor is associated with a pipe, socket, or FIFO. + * + * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). + * \param [in] fd The file descriptor to write the copy to. It must have + * already been opened for Write access. + * \param [in] flags Special options for this operation. \see mdbx_env_copy() + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags); + /** \brief Statistics for a database in the environment * \ingroup c_statinfo * \see mdbx_env_stat_ex() \see mdbx_dbi_stat() */ diff --git a/src/copy.c b/src/copy.c index 4455bdf2..7802ae17 100644 --- a/src/copy.c +++ b/src/copy.c @@ -8,6 +8,7 @@ typedef struct compacting_context { MDBX_env *env; MDBX_txn *txn; + MDBX_copy_flags_t flags; pgno_t first_unallocated; osal_condpair_t condpair; volatile unsigned head; @@ -80,7 +81,11 @@ __cold static int compacting_toggle_write_buffers(ctx_t *ctx) { ctx->head += 1; osal_condpair_signal(&ctx->condpair, true); while (!ctx->error && ctx->head - ctx->tail == 2 /* both buffers in use */) { + if (ctx->flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(ctx->txn, false); int err = osal_condpair_wait(&ctx->condpair, false); + if (err == MDBX_SUCCESS && (ctx->flags & MDBX_CP_THROTTLE_MVCC) != 0) + err = mdbx_txn_unpark(ctx->txn, false); if (err != MDBX_SUCCESS) ctx->error = err; } @@ -362,7 +367,7 @@ __cold static void meta_make_sizeable(meta_t *meta) { } } -__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, +__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, uint8_t *buffer, const bool dest_is_pipe, const MDBX_copy_flags_t flags) { @@ -370,36 +375,40 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize); meta_t *const meta = meta_init_triplet(env, buffer); - meta_set_txnid(env, meta, read_txn->txnid); + meta_set_txnid(env, meta, txn->txnid); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(meta); /* copy canary sequences if present */ - if (read_txn->canary.v) { - meta->canary = read_txn->canary; + if (txn->canary.v) { + meta->canary = txn->canary; meta->canary.v = constmeta_txnid(meta); } - if (read_txn->dbs[MAIN_DBI].root == P_INVALID) { + if (txn->dbs[MAIN_DBI].root == P_INVALID) { /* When the DB is empty, handle it specially to * fix any breakage like page leaks from ITS#8174. */ - meta->trees.main.flags = read_txn->dbs[MAIN_DBI].flags; + meta->trees.main.flags = txn->dbs[MAIN_DBI].flags; compacting_fixup_meta(env, meta); if (dest_is_pipe) { + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); int rc = osal_write(fd, buffer, meta_bytes); + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_THROTTLE_MVCC) != 0) + rc = mdbx_txn_unpark(txn, false); if (unlikely(rc != MDBX_SUCCESS)) return rc; } } else { /* Count free pages + GC pages. */ cursor_couple_t couple; - int rc = cursor_init(&couple.outer, read_txn, FREE_DBI); + int rc = cursor_init(&couple.outer, txn, FREE_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - pgno_t gc_npages = read_txn->dbs[FREE_DBI].branch_pages + - read_txn->dbs[FREE_DBI].leaf_pages + - read_txn->dbs[FREE_DBI].large_pages; + pgno_t gc_npages = txn->dbs[FREE_DBI].branch_pages + + txn->dbs[FREE_DBI].leaf_pages + + txn->dbs[FREE_DBI].large_pages; MDBX_val key, data; rc = outer_first(&couple.outer, &key, &data); while (rc == MDBX_SUCCESS) { @@ -410,7 +419,7 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, "invalid GC-record length", data.iov_len); return MDBX_CORRUPTED; } - if (unlikely(!pnl_check(pnl, read_txn->geo.first_unallocated))) { + if (unlikely(!pnl_check(pnl, txn->geo.first_unallocated))) { ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-record content"); return MDBX_CORRUPTED; @@ -421,9 +430,8 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, if (unlikely(rc != MDBX_NOTFOUND)) return rc; - meta->geometry.first_unallocated = - read_txn->geo.first_unallocated - gc_npages; - meta->trees.main = read_txn->dbs[MAIN_DBI]; + meta->geometry.first_unallocated = txn->geo.first_unallocated - gc_npages; + meta->trees.main = txn->dbs[MAIN_DBI]; ctx_t ctx; memset(&ctx, 0, sizeof(ctx)); @@ -437,16 +445,21 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, ctx.first_unallocated = NUM_METAS; ctx.env = env; ctx.fd = fd; - ctx.txn = read_txn; + ctx.txn = txn; + ctx.flags = flags; osal_thread_t thread; int thread_err = osal_thread_create(&thread, compacting_write_thread, &ctx); if (likely(thread_err == MDBX_SUCCESS)) { if (dest_is_pipe) { if (!meta->trees.main.mod_txnid) - meta->trees.main.mod_txnid = read_txn->txnid; + meta->trees.main.mod_txnid = txn->txnid; compacting_fixup_meta(env, meta); + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); rc = osal_write(fd, buffer, meta_bytes); + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_THROTTLE_MVCC) != 0) + rc = mdbx_txn_unpark(txn, false); } if (likely(rc == MDBX_SUCCESS)) rc = compacting_walk_tree(&ctx, &meta->trees.main); @@ -495,6 +508,9 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, compacting_fixup_meta(env, meta); } + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); + /* Extend file if required */ if (meta->geometry.now != meta->geometry.first_unallocated) { const size_t whole_size = pgno2bytes(env, meta->geometry.now); @@ -516,46 +532,78 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *read_txn, return MDBX_SUCCESS; } -__cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, - mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe, +//---------------------------------------------------------------------------- + +__cold static int copy_asis(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, + uint8_t *buffer, const bool dest_is_pipe, const MDBX_copy_flags_t flags) { - int rc = txn_end(read_txn, TXN_END_RESET_TMP); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - /* Temporarily block writers until we snapshot the meta pages */ - rc = lck_txn_lock(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - rc = txn_renew(read_txn, MDBX_TXN_RDONLY); - if (unlikely(rc != MDBX_SUCCESS)) { - lck_txn_unlock(env); - return rc; + bool should_unlock = false; + if ((txn->flags & MDBX_TXN_RDONLY) != 0 && (flags & MDBX_CP_RENEW_TXN) != 0) { + /* Try temporarily block writers until we snapshot the meta pages */ + int err = lck_txn_lock(env, true); + if (likely(err == MDBX_SUCCESS)) + should_unlock = true; + else if (unlikely(err != MDBX_BUSY)) + return err; } jitter4testing(false); + int rc = MDBX_SUCCESS; const size_t meta_bytes = pgno2bytes(env, NUM_METAS); - const troika_t troika = meta_tap(env); + troika_t troika = meta_tap(env); /* Make a snapshot of meta-pages, * but writing ones after the data was flushed */ +retry_snap_meta: memcpy(buffer, env->dxb_mmap.base, meta_bytes); - meta_t *const headcopy = /* LY: get pointer to the snapshot copy */ - ptr_disp(buffer, - ptr_dist(meta_recent(env, &troika).ptr_c, env->dxb_mmap.base)); - lck_txn_unlock(env); + const meta_ptr_t recent = meta_recent(env, &troika); + meta_t *headcopy = /* LY: get pointer to the snapshot copy */ + ptr_disp(buffer, ptr_dist(recent.ptr_c, env->dxb_mmap.base)); + jitter4testing(false); + if (txn->flags & MDBX_TXN_RDONLY) { + if (recent.txnid != txn->txnid) { + if (flags & MDBX_CP_RENEW_TXN) + rc = mdbx_txn_renew(txn); + else { + rc = MDBX_MVCC_RETARDED; + for (size_t n = 0; n < NUM_METAS; ++n) { + meta_t *const meta = page_meta(ptr_disp(buffer, pgno2bytes(env, n))); + if (troika.txnid[n] == txn->txnid && + ((/* is_steady */ (troika.fsm >> n) & 1) || rc != MDBX_SUCCESS)) { + rc = MDBX_SUCCESS; + headcopy = meta; + } else if (troika.txnid[n] > txn->txnid) + meta_set_txnid(env, meta, 0); + } + } + } + if (should_unlock) + lck_txn_unlock(env); + else { + troika_t snap = meta_tap(env); + if (memcmp(&troika, &snap, sizeof(troika_t)) && rc == MDBX_SUCCESS) { + troika = snap; + goto retry_snap_meta; + } + } + } + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (txn->flags & MDBX_TXN_RDONLY) + eASSERT(env, meta_txnid(headcopy) == txn->txnid); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(headcopy); /* Update signature to steady */ meta_sign_as_steady(headcopy); /* Copy the data */ - const size_t whole_size = pgno_align2os_bytes(env, read_txn->geo.end_pgno); - const size_t used_size = pgno2bytes(env, read_txn->geo.first_unallocated); + const size_t whole_size = pgno_align2os_bytes(env, txn->geo.end_pgno); + const size_t used_size = pgno2bytes(env, txn->geo.first_unallocated); jitter4testing(false); + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); + if (dest_is_pipe) rc = osal_write(fd, buffer, meta_bytes); @@ -570,7 +618,14 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, /* avoid use copyfilerange_unavailable() to ecryptfs due bugs */ not_the_same_filesystem = true; #endif /* MDBX_USE_COPYFILERANGE */ + for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) { + if (flags & MDBX_CP_THROTTLE_MVCC) { + rc = mdbx_txn_unpark(txn, false); + if (unlikely(rc != MDBX_SUCCESS)) + break; + } + #if MDBX_USE_SENDFILE static bool sendfile_unavailable; if (dest_is_pipe && likely(!sendfile_unavailable)) { @@ -579,6 +634,8 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, sendfile(fd, env->lazy_fd, &in_offset, used_size - offset); if (likely(written > 0)) { offset = in_offset; + if (flags & MDBX_CP_THROTTLE_MVCC) + rc = mdbx_txn_park(txn, false); continue; } rc = MDBX_ENODATA; @@ -596,6 +653,8 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, env->lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0); if (likely(bytes_copied > 0)) { offset = in_offset; + if (flags & MDBX_CP_THROTTLE_MVCC) + rc = mdbx_txn_park(txn, false); continue; } rc = MDBX_ENODATA; @@ -619,6 +678,8 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, : used_size - offset; /* copy to avoid EFAULT in case swapped-out */ memcpy(data_buffer, ptr_disp(env->dxb_mmap.base, offset), chunk); + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, false); rc = osal_write(fd, data_buffer, chunk); offset += chunk; } @@ -644,11 +705,22 @@ __cold static int copy_asis(MDBX_env *env, MDBX_txn *read_txn, return rc; } -__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, - MDBX_copy_flags_t flags) { - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; +//---------------------------------------------------------------------------- + +__cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + if (unlikely(txn->flags & MDBX_TXN_DIRTY)) + return MDBX_BAD_TXN; + + int rc = MDBX_SUCCESS; + if (txn->flags & MDBX_TXN_RDONLY) { + if (flags & MDBX_CP_THROTTLE_MVCC) { + rc = mdbx_txn_park(txn, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + } else if (unlikely(flags & (MDBX_CP_THROTTLE_MVCC | MDBX_CP_RENEW_TXN))) + return MDBX_EINVAL; const int dest_is_pipe = osal_is_pipe(fd); if (MDBX_IS_ERROR(dest_is_pipe)) @@ -660,6 +732,7 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return rc; } + MDBX_env *const env = txn->env; const size_t buffer_size = pgno_align2os_bytes(env, NUM_METAS) + ceil_powerof2(((flags & MDBX_CP_COMPACT) @@ -672,15 +745,6 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, if (unlikely(rc != MDBX_SUCCESS)) return rc; - MDBX_txn *read_txn = nullptr; - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. */ - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &read_txn); - if (unlikely(rc != MDBX_SUCCESS)) { - osal_memalign_free(buffer); - return rc; - } - if (!dest_is_pipe) { /* Firstly write a stub to meta-pages. * Now we sure to incomplete copy will not be used. */ @@ -688,22 +752,31 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, rc = osal_write(fd, buffer, pgno2bytes(env, NUM_METAS)); } + if (likely(rc == MDBX_SUCCESS)) + rc = mdbx_txn_unpark(txn, false); if (likely(rc == MDBX_SUCCESS)) { memset(buffer, 0, pgno2bytes(env, NUM_METAS)); rc = ((flags & MDBX_CP_COMPACT) ? copy_with_compacting : copy_asis)( - env, read_txn, fd, buffer, dest_is_pipe, flags); + env, txn, fd, buffer, dest_is_pipe, flags); + + if (likely(rc == MDBX_SUCCESS)) + rc = mdbx_txn_unpark(txn, false); } - mdbx_txn_abort(read_txn); + + if (flags & MDBX_CP_THROTTLE_MVCC) + mdbx_txn_park(txn, true); + else if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_reset(txn); if (!dest_is_pipe) { - if (likely(rc == MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0) rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE); /* Write actual meta */ if (likely(rc == MDBX_SUCCESS)) rc = osal_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); - if (likely(rc == MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0) rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); } @@ -711,38 +784,20 @@ __cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return rc; } -__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, - MDBX_copy_flags_t flags) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *dest_pathW = nullptr; - int rc = osal_mb2w(dest_path, &dest_pathW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_copyW(env, dest_pathW, flags); - osal_free(dest_pathW); - } - return rc; -} - -__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, - MDBX_copy_flags_t flags) { -#endif /* Windows */ - - int rc = check_env(env, true); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(!dest_path)) +__cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, + MDBX_copy_flags_t flags) { + if (unlikely(!dest_path || *dest_path == '\0')) return MDBX_EINVAL; /* The destination path must exist, but the destination file must not. * We don't want the OS to cache the writes, since the source data is * already in the OS cache. */ - mdbx_filehandle_t newfd; - rc = osal_openfile(MDBX_OPEN_COPY, env, dest_path, &newfd, + mdbx_filehandle_t newfd = INVALID_HANDLE_VALUE; + int rc = osal_openfile(MDBX_OPEN_COPY, txn->env, dest_path, &newfd, #if defined(_WIN32) || defined(_WIN64) - (mdbx_mode_t)-1 + (mdbx_mode_t)-1 #else - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP #endif ); @@ -767,7 +822,7 @@ __cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, #endif /* Windows / POSIX */ if (rc == MDBX_SUCCESS) - rc = mdbx_env_copy2fd(env, newfd, flags); + rc = copy2fd(txn, newfd, flags); if (newfd != INVALID_HANDLE_VALUE) { int err = osal_closefile(newfd); @@ -776,6 +831,96 @@ __cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, if (rc != MDBX_SUCCESS) (void)osal_removefile(dest_path); } - + return rc; +} + +//---------------------------------------------------------------------------- + +__cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2fd(txn, fd, flags); + if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_abort(txn); + return rc; +} + +__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, + MDBX_copy_flags_t flags) { + if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) + return MDBX_EINVAL; + + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + MDBX_txn *txn = nullptr; + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2fd(txn, fd, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); + mdbx_txn_abort(txn); + return rc; +} + +__cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, + MDBX_copy_flags_t flags) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *dest_pathW = nullptr; + int rc = osal_mb2w(dest_path, &dest_pathW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_txn_copy2pathnameW(txn, dest_pathW, flags); + osal_free(dest_pathW); + } + return rc; +} + +__cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, + MDBX_copy_flags_t flags) { +#endif /* Windows */ + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2pathname(txn, dest_path, flags); + if (flags & MDBX_CP_DISPOSE_TXN) + mdbx_txn_abort(txn); + return rc; +} + +__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, + MDBX_copy_flags_t flags) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *dest_pathW = nullptr; + int rc = osal_mb2w(dest_path, &dest_pathW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_copyW(env, dest_pathW, flags); + osal_free(dest_pathW); + } + return rc; +} + +__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, + MDBX_copy_flags_t flags) { +#endif /* Windows */ + if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN))) + return MDBX_EINVAL; + + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + MDBX_txn *txn = nullptr; + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = copy2pathname(txn, dest_path, + flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN); + mdbx_txn_abort(txn); return rc; }