mirror of
https://github.com/isar/rusqlite.git
synced 2024-11-23 09:09:19 +08:00
Change arguments parsing of CSV module
This commit is contained in:
parent
5fa3810a4b
commit
10ba0514e6
@ -1,4 +1,5 @@
|
|||||||
//! CSV Virtual Table
|
//! CSV Virtual Table
|
||||||
|
//! Port of [csv](http://www.sqlite.org/cgi/src/finfo?name=ext/misc/csv.c) C extension.
|
||||||
extern crate csv;
|
extern crate csv;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::os::raw::{c_char, c_int, c_void};
|
use std::os::raw::{c_char, c_int, c_void};
|
||||||
@ -9,9 +10,19 @@ use std::str;
|
|||||||
use {Connection, Error, Result};
|
use {Connection, Error, Result};
|
||||||
use ffi;
|
use ffi;
|
||||||
use types::Null;
|
use types::Null;
|
||||||
use vtab::{declare_vtab, escape_double_quote, Context, IndexInfo, Values, VTab, VTabCursor};
|
use vtab::{declare_vtab, dequote, escape_double_quote, parse_boolean, Context, IndexInfo, Values, VTab, VTabCursor};
|
||||||
|
|
||||||
/// Register the "csv" module.
|
/// Register the "csv" module.
|
||||||
|
/// ```sql
|
||||||
|
/// CREATE VIRTUAL TABLE vtab USING csv(
|
||||||
|
/// filename=FILENAME -- Name of file containing CSV content
|
||||||
|
/// [, schema=SCHEMA] -- Alternative CSV schema. 'CREATE TABLE x(col1 TEXT NOT NULL, col2 INT, ...);'
|
||||||
|
/// [, header=YES|NO] -- First row of CSV defines the names of columns if "yes". Default "no".
|
||||||
|
/// [, columns=N] -- Assume the CSV file contains N columns.
|
||||||
|
/// [, delimiter=C] -- CSV delimiter. Default ','.
|
||||||
|
/// [, quote=C] -- CSV quote. Default '"'. 0 means no quote.
|
||||||
|
/// );
|
||||||
|
/// ```
|
||||||
pub fn load_module(conn: &Connection) -> Result<()> {
|
pub fn load_module(conn: &Connection) -> Result<()> {
|
||||||
let aux: Option<()> = None;
|
let aux: Option<()> = None;
|
||||||
conn.create_module("csv", &CSV_MODULE, aux)
|
conn.create_module("csv", &CSV_MODULE, aux)
|
||||||
@ -55,6 +66,27 @@ impl CSVTab {
|
|||||||
.quote(self.quote)
|
.quote(self.quote)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parameter(c_slice: &[u8]) -> Result<(&str, &str)> {
|
||||||
|
let arg = try!(str::from_utf8(c_slice)).trim();
|
||||||
|
let mut split = arg.split('=');
|
||||||
|
if let Some(key) = split.next() {
|
||||||
|
if let Some(value) = split.next() {
|
||||||
|
let param = key.trim();
|
||||||
|
let value = dequote(value);
|
||||||
|
return Ok((param, value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(Error::ModuleError(format!("illegal argument: '{}'", arg)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_byte(arg: &str) -> Option<u8> {
|
||||||
|
if arg.len() == 1 {
|
||||||
|
arg.bytes().next()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VTab<CSVTabCursor> for CSVTab {
|
impl VTab<CSVTabCursor> for CSVTab {
|
||||||
@ -62,63 +94,105 @@ impl VTab<CSVTabCursor> for CSVTab {
|
|||||||
if args.len() < 4 {
|
if args.len() < 4 {
|
||||||
return Err(Error::ModuleError("no CSV file specified".to_owned()));
|
return Err(Error::ModuleError("no CSV file specified".to_owned()));
|
||||||
}
|
}
|
||||||
// pull out name of csv file (remove quotes)
|
|
||||||
let mut c_filename = args[3];
|
|
||||||
if c_filename[0] == b'\'' {
|
|
||||||
c_filename = &c_filename[1..c_filename.len() - 1];
|
|
||||||
}
|
|
||||||
let filename = try!(str::from_utf8(c_filename));
|
|
||||||
if !Path::new(filename).exists() {
|
|
||||||
return Err(Error::ModuleError(format!("file '{}' does not exist", filename)));
|
|
||||||
}
|
|
||||||
let mut vtab = CSVTab {
|
let mut vtab = CSVTab {
|
||||||
base: Default::default(),
|
base: Default::default(),
|
||||||
filename: String::from(filename),
|
filename: "".to_owned(),
|
||||||
has_headers: false,
|
has_headers: false,
|
||||||
delimiter: b',',
|
delimiter: b',',
|
||||||
quote: b'"',
|
quote: b'"',
|
||||||
offset_first_row: 0,
|
offset_first_row: 0,
|
||||||
};
|
};
|
||||||
let mut cols: Vec<String> = Vec::new();
|
let mut schema = None;
|
||||||
|
let mut n_col = None;
|
||||||
|
|
||||||
let args = &args[4..];
|
let args = &args[3..];
|
||||||
for c_slice in args {
|
for c_slice in args {
|
||||||
if c_slice.len() == 1 {
|
let (param, value) = try!(CSVTab::parameter(c_slice));
|
||||||
vtab.delimiter = c_slice[0];
|
match param {
|
||||||
} else if c_slice.len() == 3 && c_slice[0] == b'\'' {
|
"filename" => {
|
||||||
vtab.delimiter = c_slice[1];
|
if !Path::new(value).exists() {
|
||||||
|
return Err(Error::ModuleError(format!("file '{}' does not exist", value)));
|
||||||
|
}
|
||||||
|
vtab.filename = value.to_owned();
|
||||||
|
},
|
||||||
|
"schema" => {
|
||||||
|
schema = Some(value.to_owned());
|
||||||
|
},
|
||||||
|
"columns" => {
|
||||||
|
if let Ok(n) = value.parse::<u16>() {
|
||||||
|
if n_col.is_some() {
|
||||||
|
return Err(Error::ModuleError("more than one 'columns' parameter".to_owned()));
|
||||||
|
} else if n == 0 {
|
||||||
|
return Err(Error::ModuleError("must have at least one column".to_owned()));
|
||||||
|
}
|
||||||
|
n_col = Some(n);
|
||||||
} else {
|
} else {
|
||||||
let arg = try!(str::from_utf8(c_slice));
|
return Err(Error::ModuleError(format!("unrecognized argument to 'columns': {}", value)));
|
||||||
let uc = arg.to_uppercase();
|
}
|
||||||
if uc.contains("HEADER") {
|
},
|
||||||
vtab.has_headers = true;
|
"header" => {
|
||||||
} else if uc.contains("NO_QUOTE") {
|
if let Some(b) = parse_boolean(value) {
|
||||||
|
vtab.has_headers = b;
|
||||||
|
} else {
|
||||||
|
return Err(Error::ModuleError(format!("unrecognized argument to 'header': {}", value)));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"delimiter" => {
|
||||||
|
if let Some(b) = CSVTab::parse_byte(value) {
|
||||||
|
vtab.delimiter = b;
|
||||||
|
} else {
|
||||||
|
return Err(Error::ModuleError(format!("unrecognized argument to 'delimiter': {}", value)));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"quote" => {
|
||||||
|
if let Some(b) = CSVTab::parse_byte(value) {
|
||||||
|
if b == b'0' {
|
||||||
vtab.quote = 0;
|
vtab.quote = 0;
|
||||||
} else {
|
} else {
|
||||||
cols.push(escape_double_quote(arg).into_owned());
|
vtab.quote = b;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
return Err(Error::ModuleError(format!("unrecognized argument to 'quote': {}", value)));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Err(Error::ModuleError(format!("unrecognized parameter '{}'", param)));
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if vtab.has_headers {
|
if vtab.filename.is_empty() {
|
||||||
|
return Err(Error::ModuleError("no CSV file specified".to_owned()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut cols: Vec<String> = Vec::new();
|
||||||
|
if vtab.has_headers || (n_col.is_none() && schema.is_none()) {
|
||||||
let mut reader = try!(vtab.reader());
|
let mut reader = try!(vtab.reader());
|
||||||
|
if vtab.has_headers {
|
||||||
let headers = try!(reader.headers());
|
let headers = try!(reader.headers());
|
||||||
vtab.offset_first_row = reader.byte_offset();
|
vtab.offset_first_row = reader.byte_offset();
|
||||||
// headers ignored if cols is not empty
|
// headers ignored if cols is not empty
|
||||||
if cols.is_empty() {
|
if n_col.is_none() && schema.is_none() {
|
||||||
cols = headers;
|
cols = headers.into_iter().map(|header| escape_double_quote(&header).into_owned()).collect();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let mut count = 0;
|
||||||
|
while let Some(col) = reader.next_bytes().into_iter_result() {
|
||||||
|
try!(col);
|
||||||
|
cols.push(format!("c{}", count));
|
||||||
|
count+=1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cols.is_empty() {
|
if cols.is_empty() && schema.is_none() {
|
||||||
return Err(Error::ModuleError("no column name specified".to_owned()));
|
return Err(Error::ModuleError("no column specified".to_owned()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if schema.is_none() {
|
||||||
let mut sql = String::from("CREATE TABLE x(");
|
let mut sql = String::from("CREATE TABLE x(");
|
||||||
for (i, col) in cols.iter().enumerate() {
|
for (i, col) in cols.iter().enumerate() {
|
||||||
if col.is_empty() {
|
|
||||||
return Err(Error::ModuleError("no column name found".to_owned()));
|
|
||||||
}
|
|
||||||
sql.push('"');
|
sql.push('"');
|
||||||
sql.push_str(col);
|
sql.push_str(col);
|
||||||
sql.push_str("\" TEXT");
|
sql.push_str("\" TEXT");
|
||||||
@ -128,12 +202,16 @@ impl VTab<CSVTabCursor> for CSVTab {
|
|||||||
sql.push_str(", ");
|
sql.push_str(", ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
schema = Some(sql);
|
||||||
|
}
|
||||||
|
|
||||||
try!(declare_vtab(db, &sql));
|
try!(declare_vtab(db, &schema.unwrap()));
|
||||||
Ok(vtab)
|
Ok(vtab)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn best_index(&self, _info: &mut IndexInfo) -> Result<()> {
|
// Only a forward full table scan is supported.
|
||||||
|
fn best_index(&self, info: &mut IndexInfo) -> Result<()> {
|
||||||
|
info.set_estimated_cost(1000000.);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,8 +227,9 @@ struct CSVTabCursor {
|
|||||||
base: ffi::sqlite3_vtab_cursor,
|
base: ffi::sqlite3_vtab_cursor,
|
||||||
/// The CSV reader object
|
/// The CSV reader object
|
||||||
reader: csv::Reader<File>,
|
reader: csv::Reader<File>,
|
||||||
/// Current cursor position
|
/// Current cursor position used as rowid
|
||||||
row_number: usize,
|
row_number: usize,
|
||||||
|
/// Values of the current row
|
||||||
cols: Vec<String>,
|
cols: Vec<String>,
|
||||||
eof: bool,
|
eof: bool,
|
||||||
}
|
}
|
||||||
@ -172,6 +251,8 @@ impl VTabCursor<CSVTab> for CSVTabCursor {
|
|||||||
unsafe { & *(self.base.pVtab as *const CSVTab) }
|
unsafe { & *(self.base.pVtab as *const CSVTab) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only a full table scan is supported. So `filter` simply rewinds to
|
||||||
|
// the beginning.
|
||||||
fn filter(&mut self,
|
fn filter(&mut self,
|
||||||
_idx_num: c_int,
|
_idx_num: c_int,
|
||||||
_idx_str: Option<&str>,
|
_idx_str: Option<&str>,
|
||||||
@ -236,7 +317,7 @@ mod test {
|
|||||||
fn test_csv_module() {
|
fn test_csv_module() {
|
||||||
let db = Connection::open_in_memory().unwrap();
|
let db = Connection::open_in_memory().unwrap();
|
||||||
csvtab::load_module(&db).unwrap();
|
csvtab::load_module(&db).unwrap();
|
||||||
db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv('test.csv', HAS_HEADERS)").unwrap();
|
db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)").unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut s = db.prepare("SELECT rowid, * FROM vtab").unwrap();
|
let mut s = db.prepare("SELECT rowid, * FROM vtab").unwrap();
|
||||||
@ -257,7 +338,7 @@ mod test {
|
|||||||
fn test_csv_cursor() {
|
fn test_csv_cursor() {
|
||||||
let db = Connection::open_in_memory().unwrap();
|
let db = Connection::open_in_memory().unwrap();
|
||||||
csvtab::load_module(&db).unwrap();
|
csvtab::load_module(&db).unwrap();
|
||||||
db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv('test.csv', HAS_HEADERS)").unwrap();
|
db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)").unwrap();
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut s =
|
let mut s =
|
||||||
|
@ -331,6 +331,35 @@ pub fn escape_double_quote(identifier: &str) -> Cow<str> {
|
|||||||
Borrowed(identifier)
|
Borrowed(identifier)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Dequote string
|
||||||
|
pub fn dequote(s: &str) -> &str {
|
||||||
|
if s.len() < 2 {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
match s.bytes().next() {
|
||||||
|
Some(b) if b == b'"' || b == b'\'' => {
|
||||||
|
match s.bytes().rev().next() {
|
||||||
|
Some(e) if e == b => &s[1..s.len()-1],
|
||||||
|
_ => s,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => s,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// The boolean can be one of:
|
||||||
|
/// ```text
|
||||||
|
/// 1 yes true on
|
||||||
|
/// 0 no false off
|
||||||
|
/// ```
|
||||||
|
pub fn parse_boolean(s: &str) -> Option<bool> {
|
||||||
|
if s.eq_ignore_ascii_case("yes") || s.eq_ignore_ascii_case("on") || s.eq_ignore_ascii_case("true") || s.eq("1") {
|
||||||
|
Some(true)
|
||||||
|
} else if s.eq_ignore_ascii_case("no") || s.eq_ignore_ascii_case("off") || s.eq_ignore_ascii_case("false") || s.eq("0") {
|
||||||
|
Some(false)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME copy/paste from function.rs
|
// FIXME copy/paste from function.rs
|
||||||
unsafe extern "C" fn free_boxed_value<T>(p: *mut c_void) {
|
unsafe extern "C" fn free_boxed_value<T>(p: *mut c_void) {
|
||||||
@ -665,3 +694,31 @@ pub mod int_array;
|
|||||||
pub mod csvtab;
|
pub mod csvtab;
|
||||||
#[cfg(feature = "bundled")]
|
#[cfg(feature = "bundled")]
|
||||||
pub mod series;
|
pub mod series;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
#[test]
|
||||||
|
fn test_dequote() {
|
||||||
|
assert_eq!("", super::dequote(""));
|
||||||
|
assert_eq!("'", super::dequote("'"));
|
||||||
|
assert_eq!("\"", super::dequote("\""));
|
||||||
|
assert_eq!("'\"", super::dequote("'\""));
|
||||||
|
assert_eq!("", super::dequote("''"));
|
||||||
|
assert_eq!("", super::dequote("\"\""));
|
||||||
|
assert_eq!("x", super::dequote("'x'"));
|
||||||
|
assert_eq!("x", super::dequote("\"x\""));
|
||||||
|
assert_eq!("x", super::dequote("x"));
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_parse_boolean() {
|
||||||
|
assert_eq!(None, super::parse_boolean(""));
|
||||||
|
assert_eq!(Some(true), super::parse_boolean("1"));
|
||||||
|
assert_eq!(Some(true), super::parse_boolean("yes"));
|
||||||
|
assert_eq!(Some(true), super::parse_boolean("on"));
|
||||||
|
assert_eq!(Some(true), super::parse_boolean("true"));
|
||||||
|
assert_eq!(Some(false), super::parse_boolean("0"));
|
||||||
|
assert_eq!(Some(false), super::parse_boolean("no"));
|
||||||
|
assert_eq!(Some(false), super::parse_boolean("off"));
|
||||||
|
assert_eq!(Some(false), super::parse_boolean("false"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user