From 7cd909fc14f054b719a049596d327ddad6e4bd45 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Mon, 8 Jun 2020 22:57:59 -0700 Subject: [PATCH] Add positional blob i/o and adjust blob i/o example docs --- src/{blob.rs => blob/mod.rs} | 226 +++++++++++++++++++++++++++++------ src/blob/pos_io.rs | 175 +++++++++++++++++++++++++++ src/error.rs | 14 +++ 3 files changed, 377 insertions(+), 38 deletions(-) rename src/{blob.rs => blob/mod.rs} (58%) create mode 100644 src/blob/pos_io.rs diff --git a/src/blob.rs b/src/blob/mod.rs similarity index 58% rename from src/blob.rs rename to src/blob/mod.rs index f4ca951..7d7ec3d 100644 --- a/src/blob.rs +++ b/src/blob/mod.rs @@ -3,54 +3,188 @@ //! Note that SQLite does not provide API-level access to change the size of a //! BLOB; that must be performed through SQL statements. //! +//! There are two choices for how to perform IO on a [`Blob`]. +//! +//! 1. The implementations it provides of the `std::io::Read`, `std::io::Write`, +//! and `std::io::Seek` traits. +//! +//! 2. A positional IO API, e.g. [`Blob::read_at`], [`Blob::write_at`] and +//! similar. +//! +//! Documenting these in order: +//! +//! ## 1. `std::io` trait implementations. +//! //! `Blob` conforms to `std::io::Read`, `std::io::Write`, and `std::io::Seek`, //! so it plays nicely with other types that build on these (such as -//! `std::io::BufReader` and `std::io::BufWriter`). However, you must be -//! careful with the size of the blob. For example, when using a `BufWriter`, -//! the `BufWriter` will accept more data than the `Blob` -//! will allow, so make sure to call `flush` and check for errors. (See the -//! unit tests in this module for an example.) +//! `std::io::BufReader` and `std::io::BufWriter`). However, you must be careful +//! with the size of the blob. For example, when using a `BufWriter`, the +//! `BufWriter` will accept more data than the `Blob` will allow, so make sure +//! to call `flush` and check for errors. (See the unit tests in this module for +//! an example.) //! -//! ## Example +//! ## 2. Positional IO +//! +//! `Blob`s also offer a `pread` / `pwrite`-style positional IO api in the form +//! of [`Blob::read_at`], [`Blob::write_at`], [`Blob::raw_read_at`], +//! [`Blob::read_at_exact`], and [`Blob::raw_read_at_exact`]. +//! +//! These APIs all take the position to read from or write to from as a +//! parameter, instead of using an internal `pos` value. +//! +//! ### Positional IO Read Variants +//! +//! For the `read` functions, there are several functions provided: +//! +//! - [`Blob::read_at`] +//! - [`Blob::raw_read_at`] +//! - [`Blob::read_at_exact`] +//! - [`Blob::raw_read_at_exact`] +//! +//! These can be divided along two axes: raw/not raw, and exact/inexact: +//! +//! 1. Raw/not raw refers to the type of the destination buffer. The raw +//! functions take a `&mut [MaybeUninit]` as the destination buffer, +//! where the "normal" functions take a `&mut [u8]`. +//! +//! Using `MaybeUninit` here can be more efficient in some cases, but is +//! often inconvenient, so both are provided. +//! +//! 2. Exact/inexact refers to to whether or not the entire buffer must be +//! filled in order for the call to be considered a success. +//! +//! The "exact" functions require the provided buffer be entirely filled, or +//! they return an error, wheras the "inexact" functions read as much out of +//! the blob as is available, and return how much they were able to read. +//! +//! The inexact functions are preferrable if you do not know the size of the +//! blob already, and the exact functions are preferrable if you do. +//! +//! ### Comparison to using the `std::io` traits: +//! +//! In general, the positional methods offer the following Pro/Cons compared to +//! using the implementation `std::io::{Read, Write, Seek}` we provide for +//! `Blob`: +//! +//! 1. (Pro) There is no need to first seek to a position in order to perform IO +//! on it as the position is a parameter. +//! +//! 2. (Pro) `Blob`'s positional read functions don't mutate the blob in any +//! way, and take `&self`. No `&mut` access required. +//! +//! 3. (Pro) Positional IO functions return `Err(rusqlite::Error)` on failure, +//! rather than `Err(std::io::Error)`. Returning `rusqlite::Error` is more +//! accurate and convenient. +//! +//! Note that for the `std::io` API, no data is lost however, and it can be +//! recovered with `io_err.downcast::()` (this can be easy +//! to forget, though). +//! +//! 4. (Pro, for now). A `raw` version of the read API exists which can allow +//! reading into a `&mut [MaybeUninit]` buffer, which avoids a potential +//! costly initialization step. (However, `std::io` traits will certainly +//! gain this someday, which is why this is only a "Pro, for now"). +//! +//! 5. (Con) The set of functions is more bare-bones than what is offered in +//! `std::io`, which has a number of adapters, handy algorithms, further +//! traits. +//! +//! 6. (Con) No meaningful interoperability with other crates, so if you need +//! that you must use `std::io`. +//! +//! To generalize: the `std::io` traits are useful because they conform to a +//! standard interface that a lot of code knows how to handle, however that +//! interface is not a perfect fit for [`Blob`], so another small set of +//! functions is provided as well. +//! +//! # Example (`std::io`) //! //! ```rust -//! use rusqlite::blob::ZeroBlob; -//! use rusqlite::{Connection, DatabaseName, NO_PARAMS}; -//! use std::error::Error; -//! use std::io::{Read, Seek, SeekFrom, Write}; +//! # use rusqlite::blob::ZeroBlob; +//! # use rusqlite::{Connection, DatabaseName, NO_PARAMS}; +//! # use std::error::Error; +//! # use std::io::{Read, Seek, SeekFrom, Write}; +//! # fn main() -> Result<(), Box> { +//! let db = Connection::open_in_memory()?; +//! db.execute_batch("CREATE TABLE test_table (content BLOB);")?; //! -//! fn main() -> Result<(), Box> { -//! let db = Connection::open_in_memory()?; -//! db.execute_batch("CREATE TABLE test (content BLOB);")?; -//! db.execute( -//! "INSERT INTO test (content) VALUES (ZEROBLOB(10))", -//! NO_PARAMS, -//! )?; +//! // Insert a BLOB into the `content` column of `test_table`. Note that the Blob +//! // I/O API provides no way of inserting or resizing BLOBs in the DB -- this +//! // must be done via SQL. +//! db.execute( +//! "INSERT INTO test_table (content) VALUES (ZEROBLOB(10))", +//! NO_PARAMS, +//! )?; //! -//! let rowid = db.last_insert_rowid(); -//! let mut blob = db.blob_open(DatabaseName::Main, "test", "content", rowid, false)?; +//! // Get the row id off the BLOB we just inserted. +//! let rowid = db.last_insert_rowid(); +//! // Open the BLOB we just inserted for IO. +//! let mut blob = db.blob_open(DatabaseName::Main, "test_table", "content", rowid, false)?; //! -//! // Make sure to test that the number of bytes written matches what you expect; -//! // if you try to write too much, the data will be truncated to the size of the -//! // BLOB. -//! let bytes_written = blob.write(b"01234567")?; -//! assert_eq!(bytes_written, 8); +//! // Write some data into the blob. Make sure to test that the number of bytes +//! // written matches what you expect; if you try to write too much, the data +//! // will be truncated to the size of the BLOB. +//! let bytes_written = blob.write(b"01234567")?; +//! assert_eq!(bytes_written, 8); //! -//! // Same guidance - make sure you check the number of bytes read! -//! blob.seek(SeekFrom::Start(0))?; -//! let mut buf = [0u8; 20]; -//! let bytes_read = blob.read(&mut buf[..])?; -//! assert_eq!(bytes_read, 10); // note we read 10 bytes because the blob has size 10 +//! // Move back to the start and read into a local buffer. +//! // Same guidance - make sure you check the number of bytes read! +//! blob.seek(SeekFrom::Start(0))?; +//! let mut buf = [0u8; 20]; +//! let bytes_read = blob.read(&mut buf[..])?; +//! assert_eq!(bytes_read, 10); // note we read 10 bytes because the blob has size 10 //! -//! db.execute("INSERT INTO test (content) VALUES (?)", &[ZeroBlob(64)])?; +//! // Insert another BLOB, this time using a parameter passed in from +//! // rust (potentially with a dynamic size). +//! db.execute("INSERT INTO test_table (content) VALUES (?)", &[ZeroBlob(64)])?; //! -//! // given a new row ID, we can reopen the blob on that row -//! let rowid = db.last_insert_rowid(); -//! blob.reopen(rowid)?; +//! // given a new row ID, we can reopen the blob on that row +//! let rowid = db.last_insert_rowid(); +//! blob.reopen(rowid)?; +//! // Just check that the size is right. +//! assert_eq!(blob.len(), 64); +//! # Ok(()) +//! # } +//! ``` //! -//! assert_eq!(blob.size(), 64); -//! Ok(()) -//! } +//! # Example (Positional) +//! +//! ```rust +//! # use rusqlite::blob::ZeroBlob; +//! # use rusqlite::{Connection, DatabaseName, NO_PARAMS}; +//! # use std::error::Error; +//! # fn main() -> Result<(), Box> { +//! let db = Connection::open_in_memory()?; +//! db.execute_batch("CREATE TABLE test_table (content BLOB);")?; +//! // Insert a blob into the `content` column of `test_table`. Note that the Blob +//! // I/O API provides no way of inserting or resizing blobs in the DB -- this +//! // must be done via SQL. +//! db.execute( +//! "INSERT INTO test_table (content) VALUES (ZEROBLOB(10))", +//! NO_PARAMS, +//! )?; +//! // Get the row id off the blob we just inserted. +//! let rowid = db.last_insert_rowid(); +//! // Open the blob we just inserted for IO. +//! let mut blob = db.blob_open(DatabaseName::Main, "test_table", "content", rowid, false)?; +//! // Write some data into the blob. +//! blob.write_at(b"ABCDEF", 2)?; +//! +//! // Read the whole blob into a local buffer. +//! let mut buf = [0u8; 10]; +//! blob.read_at_exact(&mut buf, 0)?; +//! assert_eq!(&buf, b"\0\0ABCDEF\0\0"); +//! +//! // Insert another blob, this time using a parameter passed in from +//! // rust (potentially with a dynamic size). +//! db.execute("INSERT INTO test_table (content) VALUES (?)", &[ZeroBlob(64)])?; +//! +//! // given a new row ID, we can reopen the blob on that row +//! let rowid = db.last_insert_rowid(); +//! blob.reopen(rowid)?; +//! assert_eq!(blob.len(), 64); +//! # Ok(()) +//! # } //! ``` use std::cmp::min; use std::io; @@ -60,10 +194,14 @@ use super::ffi; use super::types::{ToSql, ToSqlOutput}; use crate::{Connection, DatabaseName, Result}; -/// `feature = "blob"` Handle to an open BLOB. +mod pos_io; + +/// `feature = "blob"` Handle to an open BLOB. See [`rusqlite::blob`](crate::blob) documentation for +/// in-depth discussion. pub struct Blob<'conn> { conn: &'conn Connection, blob: *mut ffi::sqlite3_blob, + // used by std::io implementations, pos: i32, } @@ -128,6 +266,17 @@ impl Blob<'_> { unsafe { ffi::sqlite3_blob_bytes(self.blob) } } + /// Return the current size in bytes of the BLOB. + pub fn len(&self) -> usize { + use std::convert::TryInto; + self.size().try_into().unwrap() + } + + /// Return true if the BLOB is empty. + pub fn is_empty(&self) -> bool { + self.size() == 0 + } + /// Close a BLOB handle. /// /// Calling `close` explicitly is not required (the BLOB will be closed @@ -161,7 +310,8 @@ impl io::Read for Blob<'_> { if n <= 0 { return Ok(0); } - let rc = unsafe { ffi::sqlite3_blob_read(self.blob, buf.as_ptr() as *mut _, n, self.pos) }; + let rc = + unsafe { ffi::sqlite3_blob_read(self.blob, buf.as_mut_ptr() as *mut _, n, self.pos) }; self.conn .decode_result(rc) .map(|_| { diff --git a/src/blob/pos_io.rs b/src/blob/pos_io.rs new file mode 100644 index 0000000..a9dac2d --- /dev/null +++ b/src/blob/pos_io.rs @@ -0,0 +1,175 @@ +use super::Blob; + +use std::convert::TryFrom; +use std::mem::MaybeUninit; +use std::slice::from_raw_parts_mut; + +use crate::ffi; +use crate::{Error, Result}; + +impl<'conn> Blob<'conn> { + /// Write `buf` to `self` starting at `write_start`, returning an error if + /// `write_start + buf.len()` is past the end of the blob. + /// + /// If an error is returned, no data is written. + /// + /// Note: the blob cannot be resized using this function -- that must be + /// done using SQL (for example, an `UPDATE` statement). + /// + /// Note: This is part of the positional I/O API, and thus takes an absolute + /// position write to, instead of using the internal position that can be + /// manipulated by the `std::io` traits. + pub fn write_at(&mut self, buf: &[u8], write_start: usize) -> Result<()> { + let len = self.len(); + + if buf.len().saturating_add(write_start) > len { + return Err(Error::BlobSizeError); + } + // We know `len` fits in an `i32`, so either: + // + // 1. `buf.len() + write_start` overflows, in which case we'd hit the + // return above (courtesy of `saturating_add`). + // + // 2. `buf.len() + write_start` doesn't overflow but is larger than len, + // in which case ditto. + // + // 3. `buf.len() + write_start` doesn't overflow but is less than len. + // This means that both `buf.len()` and `write_start` can also be + // losslessly converted to i32, since `len` came from an i32. + // Sanity check the above. + debug_assert!(i32::try_from(write_start).is_ok() && i32::try_from(buf.len()).is_ok()); + unsafe { + check!(ffi::sqlite3_blob_write( + self.blob, + buf.as_ptr() as *const _, + buf.len() as i32, + write_start as i32, + )); + } + Ok(()) + } + + /// Read as much as possible from `offset` to `offset + buf.len()` out of + /// `self`, writing into `buf`. On success, returns the number of bytes + /// written. + /// + /// If there's insufficient data in `self`, then the returned value will be + /// less than `buf.len()`. + /// + /// See also [`Blob::raw_read_at`], which can take an uninitialized buffer, + /// or [`Blob::read_at_exact`] which returns an error if the entire `buf` is + /// not read. + /// + /// Note: This is part of the positional I/O API, and thus takes an absolute + /// position to read from, instead of using the internal position that can + /// be manipulated by the `std::io` traits. Consequently, it does not change + /// that value. either. + pub fn read_at(&self, buf: &mut [u8], read_start: usize) -> Result { + // Safety: this is safe because `raw_read_at` never stores uninitialized + // data into `as_uninit`. + let as_uninit: &mut [MaybeUninit] = + unsafe { from_raw_parts_mut(buf.as_mut_ptr() as *mut _, buf.len()) }; + self.raw_read_at(as_uninit, read_start).map(|s| s.len()) + } + + /// Read as much as possible from `offset` to `offset + buf.len()` out of + /// `self`, writing into `buf`. On success, returns the portion of `buf` + /// which was initialized by this call. + /// + /// If there's insufficient data in `self`, then the returned value will be + /// shorter than `buf`. + /// + /// See also [`Blob::read_at`], which takes a `&mut [u8]` buffer instead of + /// a slice of `MaybeUninit`. + /// + /// Note: This is part of the positional I/O API, and thus takes an absolute + /// position to read from, instead of using the internal position that can + /// be manipulated by the `std::io` traits. Consequently, it does not change + /// that value. either. + pub fn raw_read_at<'a>( + &self, + buf: &'a mut [MaybeUninit], + read_start: usize, + ) -> Result<&'a mut [u8]> { + let len = self.len(); + + let read_len = match len.checked_sub(read_start) { + None | Some(0) => 0, + Some(v) => v.min(buf.len()), + }; + + if read_len == 0 { + // We could return `Ok(&mut [])`, but it seems confusing that the + // pointers don't match, so fabricate a empty slice of u8 with the + // same base pointer as `buf`. + let empty = unsafe { from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, 0) }; + return Ok(empty); + } + + // At this point we believe `read_start as i32` is lossless because: + // + // 1. `len as i32` is known to be lossless, since it comes from a SQLite + // api returning an i32. + // + // 2. If we got here, `len.checked_sub(read_start)` was Some (or else + // we'd have hit the `if read_len == 0` early return), so `len` must + // be larger than `read_start`, and so it must fit in i32 as well. + debug_assert!(i32::try_from(read_start).is_ok()); + + // We also believe that `read_start + read_len <= len` because: + // + // 1. This is equivalent to `read_len <= len - read_start` via algebra. + // 2. We know that `read_len` is `min(len - read_start, buf.len())` + // 3. Expanding, this is `min(len - read_start, buf.len()) <= len - read_start`, + // or `min(A, B) <= A` which is clearly true. + // + // Note that this stuff is in debug_assert so no need to use checked_add + // and such -- we'll always panic on overflow in debug builds. + debug_assert!(read_start + read_len <= len); + + // These follow naturally. + debug_assert!( + buf.len() <= read_len + && i32::try_from(buf.len()).is_ok() + && i32::try_from(read_len).is_ok() + ); + + unsafe { + check!(ffi::sqlite3_blob_read( + self.blob, + buf.as_mut_ptr() as *mut _, + read_len as i32, + read_start as i32, + )); + + Ok(from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, read_len)) + } + } + + /// Equivalent to [`Blob::read_at`], but returns a `BlobSizeError` if `buf` + /// is not fully initialized. + pub fn read_at_exact(&self, buf: &mut [u8], read_start: usize) -> Result<()> { + let n = self.read_at(buf, read_start)?; + if n != buf.len() { + Err(Error::BlobSizeError) + } else { + Ok(()) + } + } + + /// Equivalent to [`Blob::raw_read_at`], but returns a `BlobSizeError` if + /// `buf` is not fully initialized. + pub fn raw_read_at_exact<'a>( + &self, + buf: &'a mut [MaybeUninit], + read_start: usize, + ) -> Result<&'a mut [u8]> { + let buflen = buf.len(); + let initted = self.raw_read_at(buf, read_start)?; + if initted.len() != buflen { + Err(Error::BlobSizeError) + } else { + Ok(initted) + } + } +} diff --git a/src/error.rs b/src/error.rs index c05f8cc..98583cb 100644 --- a/src/error.rs +++ b/src/error.rs @@ -109,6 +109,12 @@ pub enum Error { /// parameters in the query. The first `usize` is how many parameters were /// given, the 2nd is how many were expected. InvalidParameterCount(usize, usize), + + /// Returned from various functions in the Blob IO positional API. For + /// example, [`Blob::raw_read_at_exact`](crate::blob::Blob::raw_read_at_exact) + /// will return it if the blob has insufficient data. + #[cfg(feature = "blob")] + BlobSizeError, } impl PartialEq for Error { @@ -151,6 +157,8 @@ impl PartialEq for Error { (Error::InvalidParameterCount(i1, n1), Error::InvalidParameterCount(i2, n2)) => { i1 == i2 && n1 == n2 } + #[cfg(feature = "blob")] + (Error::BlobSizeError, Error::BlobSizeError) => true, (..) => false, } } @@ -262,6 +270,9 @@ impl fmt::Display for Error { #[cfg(feature = "functions")] Error::GetAuxWrongType => write!(f, "get_aux called with wrong type"), Error::MultipleStatement => write!(f, "Multiple statements provided"), + + #[cfg(feature = "blob")] + Error::BlobSizeError => "Blob size is insufficient".fmt(f), } } } @@ -306,6 +317,9 @@ impl error::Error for Error { #[cfg(feature = "functions")] Error::GetAuxWrongType => None, + + #[cfg(feature = "blob")] + Error::BlobSizeError => None, } } }