2016-02-09 01:06:11 +08:00
|
|
|
//! CSV Virtual Table
|
|
|
|
extern crate csv;
|
|
|
|
use std::fs::File;
|
2017-03-09 03:35:07 +08:00
|
|
|
use std::os::raw::{c_char, c_int, c_void};
|
2016-02-13 03:17:42 +08:00
|
|
|
use std::path::Path;
|
|
|
|
use std::result;
|
2016-02-11 03:30:08 +08:00
|
|
|
use std::str;
|
2016-02-09 01:06:11 +08:00
|
|
|
|
|
|
|
use {Connection, Error, Result};
|
|
|
|
use ffi;
|
2016-02-11 01:07:58 +08:00
|
|
|
use types::Null;
|
2016-08-14 15:31:53 +08:00
|
|
|
use vtab::{declare_vtab, escape_double_quote, Context, IndexInfo, Values, VTab, VTabCursor};
|
2016-02-09 01:06:11 +08:00
|
|
|
|
2016-02-12 04:47:47 +08:00
|
|
|
/// Register the "csv" module.
|
2016-02-09 01:06:11 +08:00
|
|
|
pub fn load_module(conn: &Connection) -> Result<()> {
|
|
|
|
let aux: Option<()> = None;
|
|
|
|
conn.create_module("csv", &CSV_MODULE, aux)
|
|
|
|
}
|
|
|
|
|
2016-05-04 03:00:59 +08:00
|
|
|
init_module!(CSV_MODULE,
|
|
|
|
CSVTab,
|
|
|
|
CSVTabCursor,
|
2016-08-15 01:53:47 +08:00
|
|
|
Some(csv_connect),
|
|
|
|
csv_connect,
|
2016-05-04 03:00:59 +08:00
|
|
|
csv_best_index,
|
2016-08-15 01:53:47 +08:00
|
|
|
csv_disconnect,
|
|
|
|
Some(csv_disconnect),
|
2016-05-04 03:00:59 +08:00
|
|
|
csv_open,
|
|
|
|
csv_close,
|
|
|
|
csv_filter,
|
|
|
|
csv_next,
|
|
|
|
csv_eof,
|
|
|
|
csv_column,
|
|
|
|
csv_rowid);
|
2016-02-09 01:06:11 +08:00
|
|
|
|
2016-08-13 17:54:19 +08:00
|
|
|
/// An instance of the CSV virtual table
|
2016-02-09 01:06:11 +08:00
|
|
|
#[repr(C)]
|
|
|
|
struct CSVTab {
|
2016-08-13 17:54:19 +08:00
|
|
|
/// Base class. Must be first
|
2016-02-09 01:06:11 +08:00
|
|
|
base: ffi::sqlite3_vtab,
|
2016-08-13 17:54:19 +08:00
|
|
|
/// Name of the CSV file
|
2016-02-13 03:17:42 +08:00
|
|
|
filename: String,
|
|
|
|
has_headers: bool,
|
|
|
|
delimiter: u8,
|
|
|
|
quote: u8,
|
2016-08-13 17:54:19 +08:00
|
|
|
/// Offset to start of data
|
2016-02-09 01:06:11 +08:00
|
|
|
offset_first_row: u64,
|
2016-02-13 03:17:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl CSVTab {
|
|
|
|
fn reader(&self) -> result::Result<csv::Reader<File>, csv::Error> {
|
|
|
|
csv::Reader::from_file(&self.filename).map(|reader| {
|
|
|
|
reader.has_headers(self.has_headers)
|
2016-05-21 00:36:44 +08:00
|
|
|
.delimiter(self.delimiter)
|
|
|
|
.quote(self.quote)
|
2016-02-13 03:17:42 +08:00
|
|
|
})
|
|
|
|
}
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
|
|
|
|
2016-02-11 01:15:46 +08:00
|
|
|
impl VTab<CSVTabCursor> for CSVTab {
|
2017-03-09 03:35:07 +08:00
|
|
|
fn connect(db: *mut ffi::sqlite3, _aux: *mut c_void, args: &[&[u8]]) -> Result<CSVTab> {
|
2016-02-11 03:30:08 +08:00
|
|
|
if args.len() < 4 {
|
2016-04-02 23:16:17 +08:00
|
|
|
return Err(Error::ModuleError("no CSV file specified".to_owned()));
|
2016-02-11 01:07:58 +08:00
|
|
|
}
|
2016-02-11 03:30:08 +08:00
|
|
|
// pull out name of csv file (remove quotes)
|
2016-08-13 17:54:19 +08:00
|
|
|
let mut c_filename = args[3];
|
2016-02-11 03:30:08 +08:00
|
|
|
if c_filename[0] == b'\'' {
|
|
|
|
c_filename = &c_filename[1..c_filename.len() - 1];
|
|
|
|
}
|
|
|
|
let filename = try!(str::from_utf8(c_filename));
|
2016-02-13 03:17:42 +08:00
|
|
|
if !Path::new(filename).exists() {
|
|
|
|
return Err(Error::ModuleError(format!("file '{}' does not exist", filename)));
|
|
|
|
}
|
|
|
|
let mut vtab = CSVTab {
|
|
|
|
base: Default::default(),
|
|
|
|
filename: String::from(filename),
|
|
|
|
has_headers: false,
|
|
|
|
delimiter: b',',
|
|
|
|
quote: b'"',
|
|
|
|
offset_first_row: 0,
|
|
|
|
};
|
2016-02-12 04:19:18 +08:00
|
|
|
let mut cols: Vec<String> = Vec::new();
|
2016-02-11 03:30:08 +08:00
|
|
|
|
|
|
|
let args = &args[4..];
|
2016-08-13 17:54:19 +08:00
|
|
|
for c_slice in args {
|
2016-02-11 03:30:08 +08:00
|
|
|
if c_slice.len() == 1 {
|
2016-02-13 03:17:42 +08:00
|
|
|
vtab.delimiter = c_slice[0];
|
2016-02-11 03:30:08 +08:00
|
|
|
} else if c_slice.len() == 3 && c_slice[0] == b'\'' {
|
2016-02-13 03:17:42 +08:00
|
|
|
vtab.delimiter = c_slice[1];
|
2016-02-11 03:30:08 +08:00
|
|
|
} else {
|
|
|
|
let arg = try!(str::from_utf8(c_slice));
|
|
|
|
let uc = arg.to_uppercase();
|
|
|
|
if uc.contains("HEADER") {
|
2016-02-13 03:17:42 +08:00
|
|
|
vtab.has_headers = true;
|
2016-02-11 03:30:08 +08:00
|
|
|
} else if uc.contains("NO_QUOTE") {
|
2016-02-13 03:17:42 +08:00
|
|
|
vtab.quote = 0;
|
2016-02-11 03:30:08 +08:00
|
|
|
} else {
|
2016-02-12 04:19:18 +08:00
|
|
|
cols.push(escape_double_quote(arg).into_owned());
|
2016-02-11 03:30:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-13 03:17:42 +08:00
|
|
|
if vtab.has_headers {
|
|
|
|
let mut reader = try!(vtab.reader());
|
2016-02-11 03:30:08 +08:00
|
|
|
let headers = try!(reader.headers());
|
2016-02-13 03:17:42 +08:00
|
|
|
vtab.offset_first_row = reader.byte_offset();
|
2016-02-11 03:30:08 +08:00
|
|
|
// headers ignored if cols is not empty
|
|
|
|
if cols.is_empty() {
|
|
|
|
cols = headers;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if cols.is_empty() {
|
2016-04-02 23:16:17 +08:00
|
|
|
return Err(Error::ModuleError("no column name specified".to_owned()));
|
2016-02-11 03:30:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut sql = String::from("CREATE TABLE x(");
|
|
|
|
for (i, col) in cols.iter().enumerate() {
|
|
|
|
if col.is_empty() {
|
2016-04-02 23:16:17 +08:00
|
|
|
return Err(Error::ModuleError("no column name found".to_owned()));
|
2016-02-11 03:30:08 +08:00
|
|
|
}
|
|
|
|
sql.push('"');
|
|
|
|
sql.push_str(col);
|
2016-08-13 17:54:19 +08:00
|
|
|
sql.push_str("\" TEXT");
|
2016-02-11 03:30:08 +08:00
|
|
|
if i == cols.len() - 1 {
|
|
|
|
sql.push_str(");");
|
|
|
|
} else {
|
|
|
|
sql.push_str(", ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
try!(declare_vtab(db, &sql));
|
2016-02-09 01:06:11 +08:00
|
|
|
Ok(vtab)
|
|
|
|
}
|
|
|
|
|
2016-08-14 15:44:37 +08:00
|
|
|
fn best_index(&self, _info: &mut IndexInfo) -> Result<()> {
|
|
|
|
Ok(())
|
|
|
|
}
|
2016-02-09 01:06:11 +08:00
|
|
|
|
|
|
|
fn open(&self) -> Result<CSVTabCursor> {
|
2016-02-13 03:17:42 +08:00
|
|
|
Ok(CSVTabCursor::new(try!(self.reader())))
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-13 17:54:19 +08:00
|
|
|
/// A cursor for the CSV virtual table
|
2016-02-09 01:06:11 +08:00
|
|
|
#[repr(C)]
|
|
|
|
struct CSVTabCursor {
|
2016-08-13 17:54:19 +08:00
|
|
|
/// Base class. Must be first
|
2016-02-09 01:06:11 +08:00
|
|
|
base: ffi::sqlite3_vtab_cursor,
|
2016-08-13 17:54:19 +08:00
|
|
|
/// The CSV reader object
|
2016-02-13 03:17:42 +08:00
|
|
|
reader: csv::Reader<File>,
|
2016-02-09 01:06:11 +08:00
|
|
|
/// Current cursor position
|
|
|
|
row_number: usize,
|
2016-02-13 03:17:42 +08:00
|
|
|
cols: Vec<String>,
|
|
|
|
eof: bool,
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl CSVTabCursor {
|
2016-02-13 03:17:42 +08:00
|
|
|
fn new(reader: csv::Reader<File>) -> CSVTabCursor {
|
2016-02-09 01:06:11 +08:00
|
|
|
CSVTabCursor {
|
|
|
|
base: Default::default(),
|
2016-02-13 03:17:42 +08:00
|
|
|
reader: reader,
|
2016-02-09 01:06:11 +08:00
|
|
|
row_number: 0,
|
2016-02-13 03:17:42 +08:00
|
|
|
cols: Vec::new(),
|
|
|
|
eof: false,
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
|
|
|
}
|
2016-02-11 01:15:46 +08:00
|
|
|
}
|
2016-02-09 01:06:11 +08:00
|
|
|
|
2016-02-11 01:15:46 +08:00
|
|
|
impl VTabCursor<CSVTab> for CSVTabCursor {
|
2016-02-09 01:06:11 +08:00
|
|
|
fn vtab(&self) -> &mut CSVTab {
|
|
|
|
unsafe { &mut *(self.base.pVtab as *mut CSVTab) }
|
|
|
|
}
|
|
|
|
|
2016-02-12 02:16:05 +08:00
|
|
|
fn filter(&mut self,
|
2017-03-09 03:35:07 +08:00
|
|
|
_idx_num: c_int,
|
2016-08-13 19:55:30 +08:00
|
|
|
_idx_str: Option<&str>,
|
2016-08-14 15:31:53 +08:00
|
|
|
_args: &Values)
|
2016-02-12 02:16:05 +08:00
|
|
|
-> Result<()> {
|
2016-02-09 01:06:11 +08:00
|
|
|
{
|
2016-02-13 03:17:42 +08:00
|
|
|
let offset_first_row = self.vtab().offset_first_row;
|
|
|
|
try!(self.reader.seek(offset_first_row));
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
|
|
|
self.row_number = 0;
|
|
|
|
self.next()
|
|
|
|
}
|
|
|
|
fn next(&mut self) -> Result<()> {
|
2016-02-11 03:48:30 +08:00
|
|
|
{
|
2016-02-13 03:17:42 +08:00
|
|
|
self.eof = self.reader.done();
|
|
|
|
if self.eof {
|
2016-02-12 02:16:05 +08:00
|
|
|
return Ok(());
|
2016-02-11 03:48:30 +08:00
|
|
|
}
|
|
|
|
|
2016-02-13 03:17:42 +08:00
|
|
|
self.cols.clear();
|
|
|
|
while let Some(col) = self.reader.next_str().into_iter_result() {
|
|
|
|
self.cols.push(String::from(try!(col)));
|
2016-02-11 03:48:30 +08:00
|
|
|
}
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
2016-02-11 03:48:30 +08:00
|
|
|
|
2016-08-13 19:55:30 +08:00
|
|
|
self.row_number += 1;
|
2016-02-09 01:06:11 +08:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
fn eof(&self) -> bool {
|
2016-02-13 03:17:42 +08:00
|
|
|
self.eof
|
2016-02-09 01:06:11 +08:00
|
|
|
}
|
2017-03-09 03:35:07 +08:00
|
|
|
fn column(&self, ctx: &mut Context, col: c_int) -> Result<()> {
|
2016-02-13 03:17:42 +08:00
|
|
|
if col < 0 || col as usize >= self.cols.len() {
|
2016-02-11 01:07:58 +08:00
|
|
|
return Err(Error::ModuleError(format!("column index out of bounds: {}", col)));
|
|
|
|
}
|
2016-02-13 03:17:42 +08:00
|
|
|
if self.cols.is_empty() {
|
2016-08-13 23:46:49 +08:00
|
|
|
ctx.set_result(&Null);
|
2016-02-11 01:07:58 +08:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
// TODO Affinity
|
2016-08-13 23:46:49 +08:00
|
|
|
ctx.set_result(&self.cols[col as usize]);
|
2016-02-09 01:06:11 +08:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
fn rowid(&self) -> Result<i64> {
|
|
|
|
Ok(self.row_number as i64)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<csv::Error> for Error {
|
|
|
|
fn from(err: csv::Error) -> Error {
|
|
|
|
use std::error::Error as StdError;
|
|
|
|
Error::ModuleError(String::from(err.description()))
|
|
|
|
}
|
|
|
|
}
|
2016-02-12 02:16:05 +08:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
2016-05-21 00:36:44 +08:00
|
|
|
use {Connection, Result};
|
2016-02-12 02:16:05 +08:00
|
|
|
use vtab::csvtab;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_csv_module() {
|
|
|
|
let db = Connection::open_in_memory().unwrap();
|
|
|
|
csvtab::load_module(&db).unwrap();
|
|
|
|
db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv('test.csv', HAS_HEADERS)").unwrap();
|
|
|
|
|
|
|
|
{
|
2016-02-13 03:17:42 +08:00
|
|
|
let mut s = db.prepare("SELECT rowid, * FROM vtab").unwrap();
|
|
|
|
{
|
|
|
|
let headers = s.column_names();
|
|
|
|
assert_eq!(vec!["rowid", "colA", "colB", "colC"], headers);
|
|
|
|
}
|
2016-02-12 02:16:05 +08:00
|
|
|
|
2016-05-21 00:36:44 +08:00
|
|
|
let ids: Result<Vec<i32>> =
|
|
|
|
s.query_map(&[], |row| row.get::<i32, i32>(0)).unwrap().collect();
|
2016-05-21 00:31:30 +08:00
|
|
|
let sum = ids.unwrap().iter().fold(0, |acc, &id| acc + id);
|
2016-02-13 03:17:42 +08:00
|
|
|
assert_eq!(sum, 15);
|
2016-02-12 02:16:05 +08:00
|
|
|
}
|
2016-02-13 03:17:42 +08:00
|
|
|
db.execute_batch("DROP TABLE vtab").unwrap();
|
|
|
|
}
|
2016-02-12 02:16:05 +08:00
|
|
|
|
2016-02-13 03:17:42 +08:00
|
|
|
#[test]
|
|
|
|
fn test_csv_cursor() {
|
|
|
|
let db = Connection::open_in_memory().unwrap();
|
|
|
|
csvtab::load_module(&db).unwrap();
|
|
|
|
db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv('test.csv', HAS_HEADERS)").unwrap();
|
|
|
|
|
|
|
|
{
|
2016-05-21 00:36:44 +08:00
|
|
|
let mut s =
|
|
|
|
db.prepare("SELECT v1.rowid, v1.* FROM vtab v1 NATURAL JOIN vtab v2 WHERE \
|
|
|
|
v1.rowid < v2.rowid")
|
|
|
|
.unwrap();
|
2016-02-13 03:17:42 +08:00
|
|
|
|
2016-05-19 03:25:13 +08:00
|
|
|
let mut rows = s.query(&[]).unwrap();
|
|
|
|
let row = rows.next().unwrap().unwrap();
|
2016-02-13 03:17:42 +08:00
|
|
|
assert_eq!(row.get::<i32, i32>(0), 2);
|
|
|
|
}
|
2016-02-12 02:16:05 +08:00
|
|
|
db.execute_batch("DROP TABLE vtab").unwrap();
|
|
|
|
}
|
|
|
|
}
|