diff --git a/crates/pica-cli/src/commands/convert/download.rs b/crates/pica-cli/src/commands/convert/download.rs new file mode 100644 index 000000000..6bd7c5f0e --- /dev/null +++ b/crates/pica-cli/src/commands/convert/download.rs @@ -0,0 +1,95 @@ +use std::fs::File; +use std::io::{self, BufRead, BufReader}; +use std::path::Path; + +use bstr::ByteSlice; +use pica_record::io::{ReadPicaError, RecordsIter}; +use pica_record::{ByteRecord, StringRecord}; + +pub(crate) struct DownloadReader { + inner: BufReader, + buf: Vec, +} + +impl DownloadReader { + pub(crate) fn from_path>( + path: P, + ) -> io::Result { + let reader = File::open(path)?; + + Ok(Self { + inner: BufReader::new(reader), + buf: Vec::new(), + }) + } +} + +impl RecordsIter for DownloadReader { + type ByteItem<'a> + = Result, ReadPicaError> + where + Self: 'a; + + type StringItem<'a> + = Result, ReadPicaError> + where + Self: 'a; + + fn next_byte_record(&mut self) -> Option> { + self.buf.clear(); + + match self.inner.read_until(b'\n', &mut self.buf) { + Err(e) => return Some(Err(ReadPicaError::from(e))), + Ok(0) => return None, + Ok(_) => { + if !self.buf.starts_with(b"SET:") { + return Some(Err(ReadPicaError::Other( + "expected line starting with phrase 'SET:'" + .into(), + ))); + } + + match self.inner.read_until(b'\n', &mut self.buf) { + Err(e) => return Some(Err(ReadPicaError::from(e))), + Ok(n) => { + if n != 2 { + return Some(Err(ReadPicaError::Other( + "expected empty line".into(), + ))); + } + } + } + + self.buf.clear(); + } + } + + loop { + match self.inner.read_until(b'\n', &mut self.buf) { + Err(e) => return Some(Err(ReadPicaError::from(e))), + Ok(n) => { + if n == 2 { + break; + } + } + } + } + + self.buf = self + .buf + .replace(b"\xc6\x92", b"\x1f") + .replace(b"\x0D\x0A", b"\x1E") + .replace(b"\x1E\x1E", b"\x1E\x0A"); + + match ByteRecord::from_bytes(&self.buf) { + Ok(record) => Some(Ok(record)), + Err(err) => Some(Err(ReadPicaError::Parse { + msg: "invalid record".into(), + err, + })), + } + } + fn next_string_record(&mut self) -> Option> { + todo!() + } +} diff --git a/crates/pica-cli/src/commands/convert/mod.rs b/crates/pica-cli/src/commands/convert/mod.rs index 3daa2d169..a56e332d8 100644 --- a/crates/pica-cli/src/commands/convert/mod.rs +++ b/crates/pica-cli/src/commands/convert/mod.rs @@ -9,9 +9,11 @@ use self::import::ImportWriter; use self::json::JsonWriter; use self::plain::PlainWriter; use self::xml::XmlWriter; +use crate::commands::convert::download::DownloadReader; use crate::prelude::*; mod binary; +mod download; mod import; mod json; mod plain; @@ -20,6 +22,7 @@ mod xml; #[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] enum Format { Binary, + Download, Import, Json, Plain, @@ -74,13 +77,6 @@ impl Convert { let skip_invalid = self.skip_invalid || config.skip_invalid; let mut progress = Progress::new(self.progress); - if self.from != Format::Plus { - return Err(CliError::Other(format!( - "convert from {:?} is not supported", - self.from - ))); - } - let mut writer: Box = match self.to { Format::Plus => { WriterBuilder::new().from_path_or_stdout(self.output)? @@ -90,30 +86,69 @@ impl Convert { Format::Json => Box::new(JsonWriter::new(self.output)?), Format::Plain => Box::new(PlainWriter::new(self.output)?), Format::Xml => Box::new(XmlWriter::new(self.output)?), + Format::Download => { + return Err(CliError::Other( + "write to download format is not supported".into(), + )); + } }; - for filename in self.filenames { - let mut reader = - ReaderBuilder::new().from_path(filename)?; - - while let Some(result) = reader.next_byte_record() { - match result { - Err(e) if e.skip_parse_err(skip_invalid) => { - progress.update(true); - continue; + match self.from { + Format::Plus => { + for filename in self.filenames { + let mut reader = + ReaderBuilder::new().from_path(filename)?; + while let Some(result) = reader.next_byte_record() { + match result { + Err(e) + if e.skip_parse_err(skip_invalid) => + { + progress.update(true); + continue; + } + Err(e) => return Err(e.into()), + Ok(ref record) => { + writer.write_byte_record(record)?; + progress.update(false); + } + } } - Err(e) => return Err(e.into()), - Ok(ref record) => { - writer.write_byte_record(record)?; - progress.update(false); + + writer.finish()?; + } + } + Format::Download => { + for filename in self.filenames { + let mut reader = + DownloadReader::from_path(filename)?; + while let Some(result) = reader.next_byte_record() { + match result { + Err(e) + if e.skip_parse_err(skip_invalid) => + { + progress.update(true); + continue; + } + Err(e) => return Err(e.into()), + Ok(ref record) => { + writer.write_byte_record(record)?; + progress.update(false); + } + } } + + writer.finish()?; } } + _ => { + return Err(CliError::Other(format!( + "convert from {:?} is not supported", + self.from + ))); + } } progress.finish(); - writer.finish()?; - Ok(ExitCode::SUCCESS) } } diff --git a/src/reader.rs b/src/reader.rs index 8bb89522d..709065b21 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -18,6 +18,8 @@ pub enum ReadPicaError { Utf8 { msg: String, err: Utf8Error }, #[error(transparent)] IO(#[from] std::io::Error), + #[error("other: {0}")] + Other(String), } impl ReadPicaError {