/// \copyright SPDX-License-Identifier: Apache-2.0 /// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 #include "internals.h" __cold static intptr_t reasonable_db_maxsize(void) { static intptr_t cached_result; if (cached_result == 0) { intptr_t pagesize, total_ram_pages; if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != MDBX_SUCCESS)) /* the 32-bit limit is good enough for fallback */ return cached_result = MAX_MAPSIZE32; if (unlikely((size_t)total_ram_pages * 2 > MAX_MAPSIZE / (size_t)pagesize)) return cached_result = MAX_MAPSIZE; assert(MAX_MAPSIZE >= (size_t)(total_ram_pages * pagesize * 2)); /* Suggesting should not be more than golden ratio of the size of RAM. */ cached_result = (intptr_t)((size_t)total_ram_pages * 207 >> 7) * pagesize; /* Round to the nearest human-readable granulation. */ for (size_t unit = MEGABYTE; unit; unit <<= 5) { const size_t floor = floor_powerof2(cached_result, unit); const size_t ceil = ceil_powerof2(cached_result, unit); const size_t threshold = (size_t)cached_result >> 4; const bool down = cached_result - floor < ceil - cached_result || ceil > MAX_MAPSIZE; if (threshold < (down ? cached_result - floor : ceil - cached_result)) break; cached_result = down ? floor : ceil; } } return cached_result; } __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { int err = osal_fileexists(lck_pathname); if (unlikely(err != MDBX_RESULT_FALSE)) { if (err == MDBX_RESULT_TRUE) err = MDBX_DUPLICATED_CLK; ERROR("Alternative/Duplicate LCK-file '%" MDBX_PRIsPATH "' error %d", lck_pathname, err); } return err; } __cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, const mdbx_mode_t mode) { memset(&env->pathname, 0, sizeof(env->pathname)); if (unlikely(!pathname || !*pathname)) return MDBX_EINVAL; int rc; #if defined(_WIN32) || defined(_WIN64) const DWORD dwAttrib = GetFileAttributesW(pathname); if (dwAttrib == INVALID_FILE_ATTRIBUTES) { rc = GetLastError(); if (rc != MDBX_ENOFILE) return rc; if (mode == 0 || (env->flags & MDBX_RDONLY) != 0) /* can't open existing */ return rc; /* auto-create directory if requested */ if ((env->flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathname, nullptr)) { rc = GetLastError(); if (rc != ERROR_ALREADY_EXISTS) return rc; } } else { /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ env->flags |= MDBX_NOSUBDIR; if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) env->flags -= MDBX_NOSUBDIR; } #else struct stat st; if (stat(pathname, &st) != 0) { rc = errno; if (rc != MDBX_ENOFILE) return rc; if (mode == 0 || (env->flags & MDBX_RDONLY) != 0) /* can't open non-existing */ return rc /* MDBX_ENOFILE */; /* auto-create directory if requested */ const mdbx_mode_t dir_mode = (/* inherit read/write permissions for group and others */ mode & (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | /* always add read/write/search for owner */ S_IRWXU | ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); if ((env->flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { rc = errno; if (rc != EEXIST) return rc; } } else { /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ env->flags |= MDBX_NOSUBDIR; if (S_ISDIR(st.st_mode)) env->flags -= MDBX_NOSUBDIR; } #endif static const pathchar_t dxb_name[] = MDBX_DATANAME; static const pathchar_t lck_name[] = MDBX_LOCKNAME; static const pathchar_t lock_suffix[] = MDBX_LOCK_SUFFIX; #if defined(_WIN32) || defined(_WIN64) assert(dxb_name[0] == '\\' && lck_name[0] == '\\'); const size_t pathname_len = wcslen(pathname); #else assert(dxb_name[0] == '/' && lck_name[0] == '/'); const size_t pathname_len = strlen(pathname); #endif assert(!osal_isdirsep(lock_suffix[0])); size_t base_len = pathname_len; static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1; if (env->flags & MDBX_NOSUBDIR) { if (base_len > dxb_name_len && osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, dxb_name_len)) { env->flags -= MDBX_NOSUBDIR; base_len -= dxb_name_len; } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && osal_isdirsep(lck_name[0]) && osal_pathequal(pathname + base_len - dxb_name_len + 1, dxb_name + 1, dxb_name_len - 1)) { env->flags -= MDBX_NOSUBDIR; base_len -= dxb_name_len - 1; } } const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t); const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name); const size_t enough4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) ? suflen_with_NOSUBDIR : suflen_without_NOSUBDIR; const size_t bytes_needed = sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; env->pathname.buffer = osal_malloc(bytes_needed); if (!env->pathname.buffer) return MDBX_ENOMEM; env->pathname.specified = env->pathname.buffer; env->pathname.dxb = env->pathname.specified + pathname_len + 1; env->pathname.lck = env->pathname.dxb + base_len + dxb_name_len + 1; rc = MDBX_SUCCESS; pathchar_t *const buf = env->pathname.buffer; if (base_len) { memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len); if (env->flags & MDBX_NOSUBDIR) { const pathchar_t *const lck_ext = osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); if (lck_ext) { pathchar_t *pathname_ext = osal_fileext(buf, pathname_len); memcpy(pathname_ext ? pathname_ext : buf + pathname_len, lck_ext, sizeof(pathchar_t) * (ARRAY_END(lck_name) - lck_ext)); rc = check_alternative_lck_absent(buf); } } else { memcpy(buf + base_len, dxb_name, sizeof(dxb_name)); memcpy(buf + base_len + dxb_name_len, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); } memcpy(env->pathname.dxb, pathname, sizeof(pathchar_t) * (base_len + 1)); memcpy(env->pathname.lck, pathname, sizeof(pathchar_t) * base_len); if (env->flags & MDBX_NOSUBDIR) { memcpy(env->pathname.lck + base_len, lock_suffix, sizeof(lock_suffix)); } else { memcpy(env->pathname.dxb + base_len, dxb_name, sizeof(dxb_name)); memcpy(env->pathname.lck + base_len, lck_name, sizeof(lck_name)); } } else { assert(!(env->flags & MDBX_NOSUBDIR)); memcpy(buf, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); memcpy(env->pathname.dxb, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); memcpy(env->pathname.lck, lck_name + 1, sizeof(lck_name) - sizeof(pathchar_t)); } memcpy(env->pathname.specified, pathname, sizeof(pathchar_t) * (pathname_len + 1)); return rc; } /*----------------------------------------------------------------------------*/ __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(!penv)) return LOG_IFERR(MDBX_EINVAL); *penv = nullptr; #ifdef MDBX_HAVE_C11ATOMICS if (unlikely(!atomic_is_lock_free((const volatile uint32_t *)penv))) { ERROR("lock-free atomic ops for %u-bit types is required", 32); return LOG_IFERR(MDBX_INCOMPATIBLE); } #if MDBX_64BIT_ATOMIC if (unlikely(!atomic_is_lock_free((const volatile uint64_t *)penv))) { ERROR("lock-free atomic ops for %u-bit types is required", 64); return LOG_IFERR(MDBX_INCOMPATIBLE); } #endif /* MDBX_64BIT_ATOMIC */ #endif /* MDBX_HAVE_C11ATOMICS */ if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { ERROR("unsuitable system pagesize %u", globals.sys_pagesize); return LOG_IFERR(MDBX_INCOMPATIBLE); } #if defined(__linux__) || defined(__gnu_linux__) if (unlikely(globals.linux_kernel_version < 0x04000000)) { /* 2022-09-01: Прошло уже более двух лет после окончания какой-либо * поддержки самого "долгоиграющего" ядра 3.16.85 ветки 3.x */ ERROR("too old linux kernel %u.%u.%u.%u, the >= 4.0.0 is required", globals.linux_kernel_version >> 24, (globals.linux_kernel_version >> 16) & 255, (globals.linux_kernel_version >> 8) & 255, globals.linux_kernel_version & 255); return LOG_IFERR(MDBX_INCOMPATIBLE); } #endif /* Linux */ MDBX_env *env = osal_calloc(1, sizeof(MDBX_env)); if (unlikely(!env)) return LOG_IFERR(MDBX_ENOMEM); env->max_readers = DEFAULT_READERS; env->max_dbi = env->n_dbi = CORE_DBS; env->lazy_fd = env->dsync_fd = env->fd4meta = env->lck_mmap.fd = INVALID_HANDLE_VALUE; env->stuck_meta = -1; env_options_init(env); env_setup_pagesize(env, (globals.sys_pagesize < MDBX_MAX_PAGESIZE) ? globals.sys_pagesize : MDBX_MAX_PAGESIZE); int rc = osal_fastmutex_init(&env->dbi_lock); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; #if defined(_WIN32) || defined(_WIN64) imports.srwl_Init(&env->remap_guard); InitializeCriticalSection(&env->windowsbug_lock); #else rc = osal_fastmutex_init(&env->remap_guard); if (unlikely(rc != MDBX_SUCCESS)) { osal_fastmutex_destroy(&env->dbi_lock); goto bailout; } #if MDBX_LOCKING > MDBX_LOCKING_SYSV lck_t *const stub = lckless_stub(env); rc = lck_ipclock_stubinit(&stub->wrt_lock); #endif /* MDBX_LOCKING */ if (unlikely(rc != MDBX_SUCCESS)) { osal_fastmutex_destroy(&env->remap_guard); osal_fastmutex_destroy(&env->dbi_lock); goto bailout; } #endif /* Windows */ VALGRIND_CREATE_MEMPOOL(env, 0, 0); env->signature.weak = env_signature; *penv = env; return MDBX_SUCCESS; bailout: osal_free(env); return LOG_IFERR(rc); } __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { if (unlikely(target >= NUM_METAS)) return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); if (unlikely((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_EXCLUSIVE)) return LOG_IFERR(MDBX_EPERM); const meta_t *const target_meta = METAPAGE(env, target); txnid_t new_txnid = constmeta_txnid(target_meta); if (new_txnid < MIN_TXNID) new_txnid = MIN_TXNID; for (unsigned n = 0; n < NUM_METAS; ++n) { if (n == target) continue; page_t *const page = pgno2page(env, n); meta_t meta = *page_meta(page); if (meta_validate(env, &meta, page, n, nullptr) != MDBX_SUCCESS) { int err = meta_override(env, n, 0, nullptr); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); } else { txnid_t txnid = constmeta_txnid(&meta); if (new_txnid <= txnid) new_txnid = safe64_txnid_next(txnid); } } if (unlikely(new_txnid > MAX_TXNID)) { ERROR("txnid overflow, raise %d", MDBX_TXN_FULL); return LOG_IFERR(MDBX_TXN_FULL); } return LOG_IFERR(meta_override(env, target, new_txnid, target_meta)); } __cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, unsigned target_meta, bool writeable) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); if (likely(rc == MDBX_SUCCESS)) { rc = mdbx_env_open_for_recoveryW(env, pathnameW, target_meta, writeable); osal_free(pathnameW); } return LOG_IFERR(rc); } __cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, unsigned target_meta, bool writeable) { #endif /* Windows */ if (unlikely(target_meta >= NUM_METAS)) return LOG_IFERR(MDBX_EINVAL); int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); if (unlikely(env->dxb_mmap.base)) return LOG_IFERR(MDBX_EPERM); env->stuck_meta = (int8_t)target_meta; return #if defined(_WIN32) || defined(_WIN64) mdbx_env_openW #else mdbx_env_open #endif /* Windows */ (env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY, 0); } __cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); if (likely(rc == MDBX_SUCCESS)) { rc = mdbx_env_deleteW(pathnameW, mode); osal_free(pathnameW); } return LOG_IFERR(rc); } __cold int mdbx_env_deleteW(const wchar_t *pathname, MDBX_env_delete_mode_t mode) { #endif /* Windows */ switch (mode) { default: return LOG_IFERR(MDBX_EINVAL); case MDBX_ENV_JUST_DELETE: case MDBX_ENV_ENSURE_UNUSED: case MDBX_ENV_WAIT_FOR_UNUSED: break; } #ifdef __e2k__ /* https://bugs.mcst.ru/bugzilla/show_bug.cgi?id=6011 */ MDBX_env *const dummy_env = alloca(sizeof(MDBX_env)); #else MDBX_env dummy_env_silo, *const dummy_env = &dummy_env_silo; #endif memset(dummy_env, 0, sizeof(*dummy_env)); dummy_env->flags = (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; dummy_env->ps = (unsigned)mdbx_default_pagesize(); STATIC_ASSERT(sizeof(dummy_env->flags) == sizeof(MDBX_env_flags_t)); int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0); if (likely(err == MDBX_SUCCESS)) { mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, dxb_handle = INVALID_HANDLE_VALUE; if (mode > MDBX_ENV_JUST_DELETE) { err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.dxb, &dxb_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; if (err == MDBX_SUCCESS) { err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.lck, &clk_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; } if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE) err = osal_lockfile(clk_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED); if (err == MDBX_SUCCESS && dxb_handle != INVALID_HANDLE_VALUE) err = osal_lockfile(dxb_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED); } if (err == MDBX_SUCCESS) { err = osal_removefile(dummy_env->pathname.dxb); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) err = MDBX_SUCCESS; } if (err == MDBX_SUCCESS) { err = osal_removefile(dummy_env->pathname.lck); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) err = MDBX_SUCCESS; } if (err == MDBX_SUCCESS && !(dummy_env->flags & MDBX_NOSUBDIR) && (/* pathname != "." */ pathname[0] != '.' || pathname[1] != 0) && (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || pathname[2] != 0)) { err = osal_removedirectory(pathname); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) err = MDBX_SUCCESS; } if (dxb_handle != INVALID_HANDLE_VALUE) osal_closefile(dxb_handle); if (clk_handle != INVALID_HANDLE_VALUE) osal_closefile(clk_handle); } else if (err == MDBX_ENOFILE) err = MDBX_SUCCESS; osal_free(dummy_env->pathname.buffer); return LOG_IFERR((err == MDBX_SUCCESS) ? rc : err); } __cold int mdbx_env_open(MDBX_env *env, const char *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); if (likely(rc == MDBX_SUCCESS)) { rc = mdbx_env_openW(env, pathnameW, flags, mode); osal_free(pathnameW); if (rc == MDBX_SUCCESS) /* force to make cache of the multi-byte pathname representation */ mdbx_env_get_path(env, &pathname); } return LOG_IFERR(rc); } __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode) { #endif /* Windows */ int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); if (unlikely(flags & ~ENV_USABLE_FLAGS)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->lazy_fd != INVALID_HANDLE_VALUE || (env->flags & ENV_ACTIVE) != 0 || env->dxb_mmap.base)) return LOG_IFERR(MDBX_EPERM); /* Pickup previously mdbx_env_set_flags(), * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ const uint32_t saved_me_flags = env->flags; flags = combine_durability_flags(flags | DEPRECATED_COALESCE, env->flags); if (flags & MDBX_RDONLY) { /* Silently ignore irrelevant flags when we're only getting read access */ flags &= ~(MDBX_WRITEMAP | DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | DEPRECATED_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE); mode = 0; } else { #if MDBX_MMAP_INCOHERENT_FILE_WRITE /* Temporary `workaround` for OpenBSD kernel's flaw. * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ if ((flags & MDBX_WRITEMAP) == 0) { if (flags & MDBX_ACCEDE) flags |= MDBX_WRITEMAP; else { debug_log(MDBX_LOG_ERROR, __func__, __LINE__, "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " "of an internal flaw(s) in a file/buffer/page cache.\n"); return LOG_IFERR(42 /* ENOPROTOOPT */); } } #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ } env->flags = (flags & ~ENV_FATAL_ERROR); rc = env_handle_pathname(env, pathname, mode); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; env->kvs = osal_calloc(env->max_dbi, sizeof(env->kvs[0])); env->dbs_flags = osal_calloc(env->max_dbi, sizeof(env->dbs_flags[0])); env->dbi_seqs = osal_calloc(env->max_dbi, sizeof(env->dbi_seqs[0])); if (unlikely(!(env->kvs && env->dbs_flags && env->dbi_seqs))) { rc = MDBX_ENOMEM; goto bailout; } if ((flags & MDBX_RDONLY) == 0) { MDBX_txn *txn = nullptr; const intptr_t bitmap_bytes = #if MDBX_ENABLE_DBI_SPARSE ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT; #else 0; #endif /* MDBX_ENABLE_DBI_SPARSE */ const size_t base = sizeof(MDBX_txn) + sizeof(cursor_couple_t); const size_t size = base + bitmap_bytes + env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_seqs[0]) + sizeof(txn->dbi_state[0])); txn = osal_calloc(1, size); if (unlikely(!txn)) { rc = MDBX_ENOMEM; goto bailout; } txn->dbs = ptr_disp(txn, base); txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0])); txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0])); txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0])); #if MDBX_ENABLE_DBI_SPARSE txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes); #endif /* MDBX_ENABLE_DBI_SPARSE */ txn->env = env; txn->flags = MDBX_TXN_FINISHED; env->basal_txn = txn; txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); txn->tw.repnl = pnl_alloc(MDBX_PNL_INITIAL); if (unlikely(!txn->tw.retired_pages || !txn->tw.repnl)) { rc = MDBX_ENOMEM; goto bailout; } env_options_adjust_defaults(env); } rc = env_open(env, mode); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; #if MDBX_DEBUG const troika_t troika = meta_tap(env); const meta_ptr_t head = meta_recent(env, &troika); const tree_t *db = &head.ptr_c->trees.main; DEBUG("opened database version %u, pagesize %u", (uint8_t)unaligned_peek_u64(4, head.ptr_c->magic_and_version), env->ps); DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, data_page(head.ptr_c)->pgno, head.txnid); DEBUG("depth: %u", db->height); DEBUG("entries: %" PRIu64, db->items); DEBUG("branch pages: %" PRIaPGNO, db->branch_pages); DEBUG("leaf pages: %" PRIaPGNO, db->leaf_pages); DEBUG("large/overflow pages: %" PRIaPGNO, db->large_pages); DEBUG("root: %" PRIaPGNO, db->root); DEBUG("schema_altered: %" PRIaTXN, db->mod_txnid); #endif /* MDBX_DEBUG */ if (likely(rc == MDBX_SUCCESS)) { dxb_sanitize_tail(env, nullptr); } else { bailout: if (likely(env_close(env, false) == MDBX_SUCCESS)) { env->flags = saved_me_flags; } else { rc = MDBX_PANIC; env->flags = saved_me_flags | ENV_FATAL_ERROR; } } return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ #if !(defined(_WIN32) || defined(_WIN64)) __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { if (unlikely(!env)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->signature.weak != env_signature)) return LOG_IFERR(MDBX_EBADSIGN); if (unlikely(env->flags & ENV_FATAL_ERROR)) return LOG_IFERR(MDBX_PANIC); if (unlikely((env->flags & ENV_ACTIVE) == 0)) return MDBX_SUCCESS; const uint32_t new_pid = osal_getpid(); if (unlikely(env->pid == new_pid)) return MDBX_SUCCESS; if (!atomic_cas32(&env->signature, env_signature, ~env_signature)) return LOG_IFERR(MDBX_EBADSIGN); if (env->txn) txn_abort(env->basal_txn); env->registered_reader_pid = 0; int rc = env_close(env, true); env->signature.weak = env_signature; if (likely(rc == MDBX_SUCCESS)) { rc = (env->flags & MDBX_EXCLUSIVE) ? MDBX_BUSY : env_open(env, 0); if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { rc = MDBX_PANIC; env->flags |= ENV_FATAL_ERROR; } } return LOG_IFERR(rc); } #endif /* Windows */ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { page_t *dp; int rc = MDBX_SUCCESS; if (unlikely(!env)) return LOG_IFERR(MDBX_EINVAL); if (unlikely(env->signature.weak != env_signature)) return LOG_IFERR(MDBX_EBADSIGN); #if MDBX_ENV_CHECKPID || !(defined(_WIN32) || defined(_WIN64)) /* Check the PID even if MDBX_ENV_CHECKPID=0 on non-Windows * platforms (i.e. where fork() is available). * This is required to legitimize a call after fork() * from a child process, that should be allowed to free resources. */ if (unlikely(env->pid != osal_getpid())) env->flags |= ENV_FATAL_ERROR; #endif /* MDBX_ENV_CHECKPID */ if (env->dxb_mmap.base && (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0 && env->basal_txn) { if (env->basal_txn->owner && env->basal_txn->owner != osal_thread_self()) return LOG_IFERR(MDBX_BUSY); } else dont_sync = true; if (!atomic_cas32(&env->signature, env_signature, 0)) return LOG_IFERR(MDBX_EBADSIGN); if (!dont_sync) { #if defined(_WIN32) || defined(_WIN64) /* On windows, without blocking is impossible to determine whether another * process is running a writing transaction or not. * Because in the "owner died" condition kernel don't release * file lock immediately. */ rc = env_sync(env, true, false); rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; #else struct stat st; if (unlikely(fstat(env->lazy_fd, &st))) rc = errno; else if (st.st_nlink > 0 /* don't sync deleted files */) { rc = env_sync(env, true, true); rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; } #endif /* Windows */ } if (env->basal_txn && env->basal_txn->owner == osal_thread_self()) lck_txn_unlock(env); eASSERT(env, env->signature.weak == 0); rc = env_close(env, false) ? MDBX_PANIC : rc; ENSURE(env, osal_fastmutex_destroy(&env->dbi_lock) == MDBX_SUCCESS); #if defined(_WIN32) || defined(_WIN64) /* remap_guard don't have destructor (Slim Reader/Writer Lock) */ DeleteCriticalSection(&env->windowsbug_lock); #else ENSURE(env, osal_fastmutex_destroy(&env->remap_guard) == MDBX_SUCCESS); #endif /* Windows */ #if MDBX_LOCKING > MDBX_LOCKING_SYSV lck_t *const stub = lckless_stub(env); /* может вернуть ошибку в дочернем процессе после fork() */ lck_ipclock_destroy(&stub->wrt_lock); #endif /* MDBX_LOCKING */ while ((dp = env->shadow_reserve) != nullptr) { MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->ps); VALGRIND_MAKE_MEM_DEFINED(&page_next(dp), sizeof(page_t *)); env->shadow_reserve = page_next(dp); void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t)); osal_free(ptr); } VALGRIND_DESTROY_MEMPOOL(env); osal_free(env); return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, const size_t bytes, troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(env->flags & ENV_FATAL_ERROR)) return MDBX_PANIC; /* is the environment open? * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */ if (unlikely(!env->dxb_mmap.base)) { /* environment not yet opened */ #if 1 /* default behavior: returns the available info but zeroed the rest */ memset(out, 0, bytes); out->mi_geo.lower = env->geo_in_bytes.lower; out->mi_geo.upper = env->geo_in_bytes.upper; out->mi_geo.shrink = env->geo_in_bytes.shrink; out->mi_geo.grow = env->geo_in_bytes.grow; out->mi_geo.current = env->geo_in_bytes.now; out->mi_maxreaders = env->max_readers; out->mi_dxb_pagesize = env->ps; out->mi_sys_pagesize = globals.sys_pagesize; if (likely(bytes > size_before_bootid)) { out->mi_bootid.current.x = globals.bootid.x; out->mi_bootid.current.y = globals.bootid.y; } return MDBX_SUCCESS; #else /* some users may prefer this behavior: return appropriate error */ return MDBX_EPERM; #endif } *troika = (txn && !(txn->flags & MDBX_TXN_RDONLY)) ? txn->tw.troika : meta_tap(env); const meta_ptr_t head = meta_recent(env, troika); const meta_t *const meta0 = METAPAGE(env, 0); const meta_t *const meta1 = METAPAGE(env, 1); const meta_t *const meta2 = METAPAGE(env, 2); out->mi_recent_txnid = head.txnid; out->mi_meta_txnid[0] = troika->txnid[0]; out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->sign); out->mi_meta_txnid[1] = troika->txnid[1]; out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->sign); out->mi_meta_txnid[2] = troika->txnid[2]; out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->sign); if (likely(bytes > size_before_bootid)) { memcpy(&out->mi_bootid.meta[0], &meta0->bootid, 16); memcpy(&out->mi_bootid.meta[1], &meta1->bootid, 16); memcpy(&out->mi_bootid.meta[2], &meta2->bootid, 16); if (likely(bytes > size_before_dxbid)) memcpy(&out->mi_dxbid, &meta0->dxbid, 16); } const volatile meta_t *txn_meta = head.ptr_v; out->mi_last_pgno = txn_meta->geometry.first_unallocated - 1; out->mi_geo.current = pgno2bytes(env, txn_meta->geometry.now); if (txn) { out->mi_last_pgno = txn->geo.first_unallocated - 1; out->mi_geo.current = pgno2bytes(env, txn->geo.end_pgno); const txnid_t wanna_meta_txnid = (txn->flags & MDBX_TXN_RDONLY) ? txn->txnid : txn->txnid - xMDBX_TXNID_STEP; txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; } out->mi_geo.lower = pgno2bytes(env, txn_meta->geometry.lower); out->mi_geo.upper = pgno2bytes(env, txn_meta->geometry.upper); out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->geometry.shrink_pv)); out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->geometry.grow_pv)); out->mi_mapsize = env->dxb_mmap.limit; const lck_t *const lck = env->lck; out->mi_maxreaders = env->max_readers; out->mi_numreaders = env->lck_mmap.lck ? atomic_load32(&lck->rdt_length, mo_Relaxed) : INT32_MAX; out->mi_dxb_pagesize = env->ps; out->mi_sys_pagesize = globals.sys_pagesize; if (likely(bytes > size_before_bootid)) { const uint64_t unsynced_pages = atomic_load64(&lck->unsynced_pages, mo_Relaxed) + ((uint32_t)out->mi_recent_txnid != atomic_load32(&lck->meta_sync_txnid, mo_Relaxed)); out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->eoos_timestamp, mo_Relaxed); out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; ts = atomic_load64(&lck->readers_check_timestamp, mo_Relaxed); out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; out->mi_autosync_threshold = pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed)); out->mi_autosync_period_seconds16dot16 = osal_monotime_to_16dot16_noUnderflow(atomic_load64(&lck->autosync_period, mo_Relaxed)); out->mi_bootid.current.x = globals.bootid.x; out->mi_bootid.current.y = globals.bootid.y; out->mi_mode = env->lck_mmap.lck ? lck->envmode.weak : env->flags; } if (likely(bytes > size_before_pgop_stat)) { #if MDBX_ENABLE_PGOP_STAT out->mi_pgop_stat.newly = atomic_load64(&lck->pgops.newly, mo_Relaxed); out->mi_pgop_stat.cow = atomic_load64(&lck->pgops.cow, mo_Relaxed); out->mi_pgop_stat.clone = atomic_load64(&lck->pgops.clone, mo_Relaxed); out->mi_pgop_stat.split = atomic_load64(&lck->pgops.split, mo_Relaxed); out->mi_pgop_stat.merge = atomic_load64(&lck->pgops.merge, mo_Relaxed); out->mi_pgop_stat.spill = atomic_load64(&lck->pgops.spill, mo_Relaxed); out->mi_pgop_stat.unspill = atomic_load64(&lck->pgops.unspill, mo_Relaxed); out->mi_pgop_stat.wops = atomic_load64(&lck->pgops.wops, mo_Relaxed); out->mi_pgop_stat.prefault = atomic_load64(&lck->pgops.prefault, mo_Relaxed); out->mi_pgop_stat.mincore = atomic_load64(&lck->pgops.mincore, mo_Relaxed); out->mi_pgop_stat.msync = atomic_load64(&lck->pgops.msync, mo_Relaxed); out->mi_pgop_stat.fsync = atomic_load64(&lck->pgops.fsync, mo_Relaxed); #else memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat)); #endif /* MDBX_ENABLE_PGOP_STAT*/ } txnid_t overall_latter_reader_txnid = out->mi_recent_txnid; txnid_t self_latter_reader_txnid = overall_latter_reader_txnid; if (env->lck_mmap.lck) { for (size_t i = 0; i < out->mi_numreaders; ++i) { const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease); if (pid) { const txnid_t txnid = safe64_read(&lck->rdt[i].txnid); if (overall_latter_reader_txnid > txnid) overall_latter_reader_txnid = txnid; if (pid == env->pid && self_latter_reader_txnid > txnid) self_latter_reader_txnid = txnid; } } } out->mi_self_latter_reader_txnid = self_latter_reader_txnid; out->mi_latter_reader_txnid = overall_latter_reader_txnid; osal_compiler_barrier(); return MDBX_SUCCESS; } __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika) { MDBX_envinfo snap; int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); if (unlikely(rc != MDBX_SUCCESS)) return rc; eASSERT(env, sizeof(snap) >= bytes); while (1) { rc = env_info_snap(env, txn, out, bytes, troika); if (unlikely(rc != MDBX_SUCCESS)) return rc; snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; snap.mi_since_reader_check_seconds16dot16 = out->mi_since_reader_check_seconds16dot16; if (likely(memcmp(&snap, out, bytes) == 0)) return MDBX_SUCCESS; memcpy(&snap, out, bytes); } } __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == nullptr && txn == nullptr) || arg == nullptr)) return LOG_IFERR(MDBX_EINVAL); const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat && bytes != size_before_dxbid) return LOG_IFERR(MDBX_EINVAL); if (txn) { int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); } if (env) { int err = check_env(env, false); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); if (txn && unlikely(txn->env != env)) return LOG_IFERR(MDBX_EINVAL); } else { env = txn->env; } troika_t troika; return LOG_IFERR(env_info(env, txn, arg, bytes, &troika)); } __cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, size_t bytes) { #if defined(_WIN32) || defined(_WIN64) wchar_t *pathnameW = nullptr; int rc = osal_mb2w(pathname, &pathnameW); if (likely(rc == MDBX_SUCCESS)) { rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes); osal_free(pathnameW); } return LOG_IFERR(rc); } __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, size_t bytes) { #endif /* Windows */ if (unlikely(!out)) return LOG_IFERR(MDBX_EINVAL); const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid); if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat && bytes != size_before_dxbid) return LOG_IFERR(MDBX_EINVAL); memset(out, 0, bytes); if (likely(bytes > size_before_bootid)) { out->mi_bootid.current.x = globals.bootid.x; out->mi_bootid.current.y = globals.bootid.y; } MDBX_env env; memset(&env, 0, sizeof(env)); env.pid = osal_getpid(); if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) { ERROR("unsuitable system pagesize %u", globals.sys_pagesize); return LOG_IFERR(MDBX_INCOMPATIBLE); } out->mi_sys_pagesize = globals.sys_pagesize; env.flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION; env.stuck_meta = -1; env.lck_mmap.fd = INVALID_HANDLE_VALUE; env.lazy_fd = INVALID_HANDLE_VALUE; env.dsync_fd = INVALID_HANDLE_VALUE; env.fd4meta = INVALID_HANDLE_VALUE; #if defined(_WIN32) || defined(_WIN64) env.dxb_lock_event = INVALID_HANDLE_VALUE; env.ioring.overlapped_fd = INVALID_HANDLE_VALUE; #endif /* Windows */ env_options_init(&env); int rc = env_handle_pathname(&env, pathname, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.pathname.dxb, &env.lazy_fd, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; meta_t header; rc = dxb_read_header(&env, &header, 0, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; out->mi_dxb_pagesize = env_setup_pagesize(&env, header.pagesize); out->mi_geo.lower = pgno2bytes(&env, header.geometry.lower); out->mi_geo.upper = pgno2bytes(&env, header.geometry.upper); out->mi_geo.shrink = pgno2bytes(&env, pv2pages(header.geometry.shrink_pv)); out->mi_geo.grow = pgno2bytes(&env, pv2pages(header.geometry.grow_pv)); out->mi_geo.current = pgno2bytes(&env, header.geometry.now); out->mi_last_pgno = header.geometry.first_unallocated - 1; const unsigned n = 0; out->mi_recent_txnid = constmeta_txnid(&header); out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.sign); if (likely(bytes > size_before_bootid)) { memcpy(&out->mi_bootid.meta[n], &header.bootid, 16); if (likely(bytes > size_before_dxbid)) memcpy(&out->mi_dxbid, &header.dxbid, 16); } bailout: env_close(&env, false); return LOG_IFERR(rc); } /*----------------------------------------------------------------------------*/ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, intptr_t size_upper, intptr_t growth_step, intptr_t shrink_threshold, intptr_t pagesize) { int rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); const bool txn0_owned = env->basal_txn && env_txn0_owned(env); const bool inside_txn = txn0_owned && env->txn; bool should_unlock = false; #if MDBX_DEBUG if (growth_step < 0) { growth_step = 1; if (shrink_threshold < 0) shrink_threshold = 1; } #endif /* MDBX_DEBUG */ if (env->dxb_mmap.base) { /* env already mapped */ if (unlikely(env->flags & MDBX_RDONLY)) return LOG_IFERR(MDBX_EACCESS); if (!txn0_owned) { int err = lck_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); should_unlock = true; env->basal_txn->tw.troika = meta_tap(env); eASSERT(env, !env->txn && !env->basal_txn->nested); env->basal_txn->txnid = env->basal_txn->tw.troika.txnid[env->basal_txn->tw.troika.recent]; txn_snapshot_oldest(env->basal_txn); } /* get untouched params from current TXN or DB */ if (pagesize <= 0 || pagesize >= INT_MAX) pagesize = env->ps; const geo_t *const geo = inside_txn ? &env->txn->geo : &meta_recent(env, &env->basal_txn->tw.troika).ptr_c->geometry; if (size_lower < 0) size_lower = pgno2bytes(env, geo->lower); if (size_now < 0) size_now = pgno2bytes(env, geo->now); if (size_upper < 0) size_upper = pgno2bytes(env, geo->upper); if (growth_step < 0) growth_step = pgno2bytes(env, pv2pages(geo->grow_pv)); if (shrink_threshold < 0) shrink_threshold = pgno2bytes(env, pv2pages(geo->shrink_pv)); if (pagesize != (intptr_t)env->ps) { rc = MDBX_EINVAL; goto bailout; } const size_t usedbytes = pgno2bytes(env, mvcc_snapshot_largest(env, geo->first_unallocated)); if ((size_t)size_upper < usedbytes) { rc = MDBX_MAP_FULL; goto bailout; } if ((size_t)size_now < usedbytes) size_now = usedbytes; } else { /* env NOT yet mapped */ if (unlikely(inside_txn)) return LOG_IFERR(MDBX_PANIC); /* is requested some auto-value for pagesize ? */ if (pagesize >= INT_MAX /* maximal */) pagesize = MDBX_MAX_PAGESIZE; else if (pagesize <= 0) { if (pagesize < 0 /* default */) { pagesize = globals.sys_pagesize; if ((uintptr_t)pagesize > MDBX_MAX_PAGESIZE) pagesize = MDBX_MAX_PAGESIZE; eASSERT(env, (uintptr_t)pagesize >= MDBX_MIN_PAGESIZE); } else if (pagesize == 0 /* minimal */) pagesize = MDBX_MIN_PAGESIZE; /* choose pagesize */ intptr_t top = (size_now > size_lower) ? size_now : size_lower; if (size_upper > top) top = size_upper; if (top < 0 /* default */) top = reasonable_db_maxsize(); else if (top == 0 /* minimal */) top = MIN_MAPSIZE; else if (top >= (intptr_t)MAX_MAPSIZE /* maximal */) top = MAX_MAPSIZE; while (top > pagesize * (int64_t)(MAX_PAGENO + 1) && pagesize < MDBX_MAX_PAGESIZE) pagesize <<= 1; } } if (pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2(pagesize)) { rc = MDBX_EINVAL; goto bailout; } if (size_lower <= 0) { size_lower = MIN_MAPSIZE; if (MIN_MAPSIZE / pagesize < MIN_PAGENO) size_lower = MIN_PAGENO * pagesize; } if (size_lower >= INTPTR_MAX) { size_lower = reasonable_db_maxsize(); if ((size_t)size_lower / pagesize > MAX_PAGENO + 1) size_lower = pagesize * (MAX_PAGENO + 1); } if (size_now <= 0) { size_now = size_lower; if (size_upper >= size_lower && size_now > size_upper) size_now = size_upper; } if (size_now >= INTPTR_MAX) { size_now = reasonable_db_maxsize(); if ((size_t)size_now / pagesize > MAX_PAGENO + 1) size_now = pagesize * (MAX_PAGENO + 1); } if (size_upper <= 0) { if (growth_step == 0 || size_upper == 0) size_upper = size_now; else if (size_now >= reasonable_db_maxsize() / 2) size_upper = reasonable_db_maxsize(); else if ((size_t)size_now >= MAX_MAPSIZE32 / 2 && (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3) size_upper = MAX_MAPSIZE32; else { size_upper = ceil_powerof2(((size_t)size_now < MAX_MAPSIZE / 4) ? size_now + size_now : size_now + size_now / 2, MEGABYTE * MDBX_WORDBITS * MDBX_WORDBITS / 32); if ((size_t)size_upper > MAX_MAPSIZE) size_upper = MAX_MAPSIZE; } if ((size_t)size_upper / pagesize > (MAX_PAGENO + 1)) size_upper = pagesize * (MAX_PAGENO + 1); } else if (size_upper >= INTPTR_MAX) { size_upper = reasonable_db_maxsize(); if ((size_t)size_upper / pagesize > MAX_PAGENO + 1) size_upper = pagesize * (MAX_PAGENO + 1); } if (unlikely(size_lower < (intptr_t)MIN_MAPSIZE || size_lower > size_upper)) { rc = MDBX_EINVAL; goto bailout; } if ((uint64_t)size_lower / pagesize < MIN_PAGENO) { size_lower = pagesize * MIN_PAGENO; if (unlikely(size_lower > size_upper)) { rc = MDBX_EINVAL; goto bailout; } if (size_now < size_lower) size_now = size_lower; } if (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { rc = MDBX_TOO_LARGE; goto bailout; } const size_t unit = (globals.sys_pagesize > (size_t)pagesize) ? globals.sys_pagesize : (size_t)pagesize; size_lower = ceil_powerof2(size_lower, unit); size_upper = ceil_powerof2(size_upper, unit); size_now = ceil_powerof2(size_now, unit); /* LY: подбираем значение size_upper: * - кратное размеру страницы * - без нарушения MAX_MAPSIZE и MAX_PAGENO */ while (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) { if ((size_t)size_upper < unit + MIN_MAPSIZE || (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) { /* паранойа на случай переполнения при невероятных значениях */ rc = MDBX_EINVAL; goto bailout; } size_upper -= unit; if ((size_t)size_upper < (size_t)size_lower) size_lower = size_upper; } eASSERT(env, (size_upper - size_lower) % globals.sys_pagesize == 0); if (size_now < size_lower) size_now = size_lower; if (size_now > size_upper) size_now = size_upper; if (growth_step < 0) { growth_step = ((size_t)(size_upper - size_lower)) / 42; if (growth_step > size_lower && size_lower < (intptr_t)MEGABYTE) growth_step = size_lower; if (growth_step < 65536) growth_step = 65536; if ((size_t)growth_step > MAX_MAPSIZE / 64) growth_step = MAX_MAPSIZE / 64; } if (growth_step == 0 && shrink_threshold > 0) growth_step = 1; growth_step = ceil_powerof2(growth_step, unit); if (shrink_threshold < 0) shrink_threshold = growth_step + growth_step; shrink_threshold = ceil_powerof2(shrink_threshold, unit); //---------------------------------------------------------------------------- if (!env->dxb_mmap.base) { /* save user's geo-params for future open/create */ if (pagesize != (intptr_t)env->ps) env_setup_pagesize(env, pagesize); env->geo_in_bytes.lower = size_lower; env->geo_in_bytes.now = size_now; env->geo_in_bytes.upper = size_upper; env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, growth_step)))); env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, shrink_threshold)))); env_options_adjust_defaults(env); ENSURE(env, env->geo_in_bytes.lower >= MIN_MAPSIZE); ENSURE(env, env->geo_in_bytes.lower / (unsigned)pagesize >= MIN_PAGENO); ENSURE(env, env->geo_in_bytes.lower % (unsigned)pagesize == 0); ENSURE(env, env->geo_in_bytes.lower % globals.sys_pagesize == 0); ENSURE(env, env->geo_in_bytes.upper <= MAX_MAPSIZE); ENSURE(env, env->geo_in_bytes.upper / (unsigned)pagesize <= MAX_PAGENO + 1); ENSURE(env, env->geo_in_bytes.upper % (unsigned)pagesize == 0); ENSURE(env, env->geo_in_bytes.upper % globals.sys_pagesize == 0); ENSURE(env, env->geo_in_bytes.now >= env->geo_in_bytes.lower); ENSURE(env, env->geo_in_bytes.now <= env->geo_in_bytes.upper); ENSURE(env, env->geo_in_bytes.now % (unsigned)pagesize == 0); ENSURE(env, env->geo_in_bytes.now % globals.sys_pagesize == 0); ENSURE(env, env->geo_in_bytes.grow % (unsigned)pagesize == 0); ENSURE(env, env->geo_in_bytes.grow % globals.sys_pagesize == 0); ENSURE(env, env->geo_in_bytes.shrink % (unsigned)pagesize == 0); ENSURE(env, env->geo_in_bytes.shrink % globals.sys_pagesize == 0); rc = MDBX_SUCCESS; } else { /* apply new params to opened environment */ ENSURE(env, pagesize == (intptr_t)env->ps); meta_t meta; memset(&meta, 0, sizeof(meta)); if (!inside_txn) { eASSERT(env, should_unlock); const meta_ptr_t head = meta_recent(env, &env->basal_txn->tw.troika); uint64_t timestamp = 0; while ("workaround for " "https://libmdbx.dqdkfa.ru/dead-github/issues/269") { rc = coherency_fetch_head(env->basal_txn, head, ×tamp); if (likely(rc == MDBX_SUCCESS)) break; if (unlikely(rc != MDBX_RESULT_TRUE)) goto bailout; } meta = *head.ptr_c; const txnid_t txnid = safe64_txnid_next(head.txnid); if (unlikely(txnid > MAX_TXNID)) { rc = MDBX_TXN_FULL; ERROR("txnid overflow, raise %d", rc); goto bailout; } meta_set_txnid(env, &meta, txnid); } const geo_t *const current_geo = &(env->txn ? env->txn : env->basal_txn)->geo; /* update env-geo to avoid influences */ env->geo_in_bytes.now = pgno2bytes(env, current_geo->now); env->geo_in_bytes.lower = pgno2bytes(env, current_geo->lower); env->geo_in_bytes.upper = pgno2bytes(env, current_geo->upper); env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(current_geo->grow_pv)); env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(current_geo->shrink_pv)); geo_t new_geo; new_geo.lower = bytes2pgno(env, size_lower); new_geo.now = bytes2pgno(env, size_now); new_geo.upper = bytes2pgno(env, size_upper); new_geo.grow_pv = pages2pv(bytes2pgno(env, growth_step)); new_geo.shrink_pv = pages2pv(bytes2pgno(env, shrink_threshold)); new_geo.first_unallocated = current_geo->first_unallocated; ENSURE(env, pgno_align2os_bytes(env, new_geo.lower) == (size_t)size_lower); ENSURE(env, pgno_align2os_bytes(env, new_geo.upper) == (size_t)size_upper); ENSURE(env, pgno_align2os_bytes(env, new_geo.now) == (size_t)size_now); ENSURE(env, new_geo.grow_pv == pages2pv(pv2pages(new_geo.grow_pv))); ENSURE(env, new_geo.shrink_pv == pages2pv(pv2pages(new_geo.shrink_pv))); ENSURE(env, (size_t)size_lower >= MIN_MAPSIZE); ENSURE(env, new_geo.lower >= MIN_PAGENO); ENSURE(env, (size_t)size_upper <= MAX_MAPSIZE); ENSURE(env, new_geo.upper <= MAX_PAGENO + 1); ENSURE(env, new_geo.now >= new_geo.first_unallocated); ENSURE(env, new_geo.upper >= new_geo.now); ENSURE(env, new_geo.now >= new_geo.lower); if (memcmp(current_geo, &new_geo, sizeof(geo_t)) != 0) { #if defined(_WIN32) || defined(_WIN64) /* Was DB shrinking disabled before and now it will be enabled? */ if (new_geo.lower < new_geo.upper && new_geo.shrink_pv && !(current_geo->lower < current_geo->upper && current_geo->shrink_pv)) { if (!env->lck_mmap.lck) { rc = MDBX_EPERM; goto bailout; } int err = lck_rdt_lock(env); if (unlikely(MDBX_IS_ERROR(err))) { rc = err; goto bailout; } /* Check if there are any reading threads that do not use the SRWL */ const size_t CurrentTid = GetCurrentThreadId(); const reader_slot_t *const begin = env->lck_mmap.lck->rdt; const reader_slot_t *const end = begin + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease); for (const reader_slot_t *reader = begin; reader < end; ++reader) { if (reader->pid.weak == env->pid && reader->tid.weak != CurrentTid) { /* At least one thread may don't use SRWL */ rc = MDBX_EPERM; break; } } lck_rdt_unlock(env); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } #endif /* Windows */ if (new_geo.now != current_geo->now || new_geo.upper != current_geo->upper) { rc = dxb_resize(env, current_geo->first_unallocated, new_geo.now, new_geo.upper, explicit_resize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } if (inside_txn) { env->txn->geo = new_geo; env->txn->flags |= MDBX_TXN_DIRTY; } else { meta.geometry = new_geo; rc = dxb_sync_locked(env, env->flags, &meta, &env->basal_txn->tw.troika); if (likely(rc == MDBX_SUCCESS)) { env->geo_in_bytes.now = pgno2bytes(env, new_geo.now = meta.geometry.now); env->geo_in_bytes.upper = pgno2bytes(env, new_geo.upper = meta.geometry.upper); } } } if (likely(rc == MDBX_SUCCESS)) { /* update env-geo to avoid influences */ eASSERT(env, env->geo_in_bytes.now == pgno2bytes(env, new_geo.now)); env->geo_in_bytes.lower = pgno2bytes(env, new_geo.lower); eASSERT(env, env->geo_in_bytes.upper == pgno2bytes(env, new_geo.upper)); env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(new_geo.grow_pv)); env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(new_geo.shrink_pv)); } } bailout: if (should_unlock) lck_txn_unlock(env); return LOG_IFERR(rc); } __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { int rc = check_env(env, true); if (unlikely(rc != MDBX_SUCCESS)) return LOG_IFERR(rc); return LOG_IFERR(env_sync(env, force, nonblock)); } /*----------------------------------------------------------------------------*/ static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) { st->ms_depth += db->height; st->ms_branch_pages += db->branch_pages; st->ms_leaf_pages += db->leaf_pages; st->ms_overflow_pages += db->large_pages; st->ms_entries += db->items; if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid))) st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid; } static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { memset(st, 0, bytes); int err = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(err != MDBX_SUCCESS)) return err; cursor_couple_t cx; err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); if (unlikely(err != MDBX_SUCCESS)) return err; const MDBX_env *const env = txn->env; st->ms_psize = env->ps; TXN_FOREACH_DBI_FROM(txn, dbi, /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) stat_add(txn->dbs + dbi, st, bytes); } if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) { /* scan and account not opened named tables */ err = tree_search(&cx.outer, nullptr, Z_FIRST); while (err == MDBX_SUCCESS) { const page_t *mp = cx.outer.pg[cx.outer.top]; for (size_t i = 0; i < page_numkeys(mp); i++) { const node_t *node = page_node(mp, i); if (node_flags(node) != N_TREE) continue; if (unlikely(node_ds(node) != sizeof(tree_t))) { ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node)); return MDBX_CORRUPTED; } /* skip opened and already accounted */ const MDBX_val name = {node_key(node), node_ks(node)}; TXN_FOREACH_DBI_USER(txn, dbi) { if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) { node = nullptr; break; } } if (node) { tree_t db; memcpy(&db, node_data(node), sizeof(db)); stat_add(&db, st, bytes); } } err = cursor_sibling_right(&cx.outer); } if (unlikely(err != MDBX_NOTFOUND)) return err; } return MDBX_SUCCESS; } __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) { if (unlikely(!dest)) return LOG_IFERR(MDBX_EINVAL); const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) return LOG_IFERR(MDBX_EINVAL); if (likely(txn)) { if (env && unlikely(txn->env != env)) return LOG_IFERR(MDBX_EINVAL); return LOG_IFERR(stat_acc(txn, dest, bytes)); } int err = check_env(env, true); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); if (env->txn && env_txn0_owned(env)) /* inside write-txn */ return LOG_IFERR(stat_acc(env->txn, dest, bytes)); MDBX_txn *tmp_txn; err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); const int rc = stat_acc(tmp_txn, dest, bytes); err = mdbx_txn_abort(tmp_txn); if (unlikely(err != MDBX_SUCCESS)) return LOG_IFERR(err); return LOG_IFERR(rc); }