Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions crates/pica-cli/src/commands/convert/download.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::path::Path;

use bstr::ByteSlice;
use pica_record::io::{ReadPicaError, RecordsIter};
use pica_record::{ByteRecord, StringRecord};

pub(crate) struct DownloadReader {
inner: BufReader<File>,
buf: Vec<u8>,
}

impl DownloadReader {
pub(crate) fn from_path<P: AsRef<Path>>(
path: P,
) -> io::Result<Self> {
let reader = File::open(path)?;

Ok(Self {
inner: BufReader::new(reader),
buf: Vec::new(),
})
}
}

impl RecordsIter for DownloadReader {
type ByteItem<'a>
= Result<ByteRecord<'a>, ReadPicaError>
where
Self: 'a;

type StringItem<'a>
= Result<StringRecord<'a>, ReadPicaError>
where
Self: 'a;

fn next_byte_record(&mut self) -> Option<Self::ByteItem<'_>> {
self.buf.clear();

match self.inner.read_until(b'\n', &mut self.buf) {
Err(e) => return Some(Err(ReadPicaError::from(e))),
Ok(0) => return None,
Ok(_) => {
if !self.buf.starts_with(b"SET:") {
return Some(Err(ReadPicaError::Other(
"expected line starting with phrase 'SET:'"
.into(),
)));
}

match self.inner.read_until(b'\n', &mut self.buf) {
Err(e) => return Some(Err(ReadPicaError::from(e))),
Ok(n) => {
if n != 2 {
return Some(Err(ReadPicaError::Other(
"expected empty line".into(),
)));
}
}
}

self.buf.clear();
}
}

loop {
match self.inner.read_until(b'\n', &mut self.buf) {
Err(e) => return Some(Err(ReadPicaError::from(e))),
Ok(n) => {
if n == 2 {
break;
}
}
}
}

self.buf = self
.buf
.replace(b"\xc6\x92", b"\x1f")
.replace(b"\x0D\x0A", b"\x1E")
.replace(b"\x1E\x1E", b"\x1E\x0A");

match ByteRecord::from_bytes(&self.buf) {
Ok(record) => Some(Ok(record)),
Err(err) => Some(Err(ReadPicaError::Parse {
msg: "invalid record".into(),
err,
})),
}
}
fn next_string_record(&mut self) -> Option<Self::StringItem<'_>> {
todo!()
}
}
79 changes: 57 additions & 22 deletions crates/pica-cli/src/commands/convert/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ use self::import::ImportWriter;
use self::json::JsonWriter;
use self::plain::PlainWriter;
use self::xml::XmlWriter;
use crate::commands::convert::download::DownloadReader;
use crate::prelude::*;

mod binary;
mod download;
mod import;
mod json;
mod plain;
Expand All @@ -20,6 +22,7 @@ mod xml;
#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)]
enum Format {
Binary,
Download,
Import,
Json,
Plain,
Expand Down Expand Up @@ -74,13 +77,6 @@ impl Convert {
let skip_invalid = self.skip_invalid || config.skip_invalid;
let mut progress = Progress::new(self.progress);

if self.from != Format::Plus {
return Err(CliError::Other(format!(
"convert from {:?} is not supported",
self.from
)));
}

let mut writer: Box<dyn ByteRecordWrite> = match self.to {
Format::Plus => {
WriterBuilder::new().from_path_or_stdout(self.output)?
Expand All @@ -90,30 +86,69 @@ impl Convert {
Format::Json => Box::new(JsonWriter::new(self.output)?),
Format::Plain => Box::new(PlainWriter::new(self.output)?),
Format::Xml => Box::new(XmlWriter::new(self.output)?),
Format::Download => {
return Err(CliError::Other(
"write to download format is not supported".into(),
));
}
};

for filename in self.filenames {
let mut reader =
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next_byte_record() {
match result {
Err(e) if e.skip_parse_err(skip_invalid) => {
progress.update(true);
continue;
match self.from {
Format::Plus => {
for filename in self.filenames {
let mut reader =
ReaderBuilder::new().from_path(filename)?;
while let Some(result) = reader.next_byte_record() {
match result {
Err(e)
if e.skip_parse_err(skip_invalid) =>
{
progress.update(true);
continue;
}
Err(e) => return Err(e.into()),
Ok(ref record) => {
writer.write_byte_record(record)?;
progress.update(false);
}
}
}
Err(e) => return Err(e.into()),
Ok(ref record) => {
writer.write_byte_record(record)?;
progress.update(false);

writer.finish()?;
}
}
Format::Download => {
for filename in self.filenames {
let mut reader =
DownloadReader::from_path(filename)?;
while let Some(result) = reader.next_byte_record() {
match result {
Err(e)
if e.skip_parse_err(skip_invalid) =>
{
progress.update(true);
continue;
}
Err(e) => return Err(e.into()),
Ok(ref record) => {
writer.write_byte_record(record)?;
progress.update(false);
}
}
}

writer.finish()?;
}
}
_ => {
return Err(CliError::Other(format!(
"convert from {:?} is not supported",
self.from
)));
}
}

progress.finish();
writer.finish()?;

Ok(ExitCode::SUCCESS)
}
}
2 changes: 2 additions & 0 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub enum ReadPicaError {
Utf8 { msg: String, err: Utf8Error },
#[error(transparent)]
IO(#[from] std::io::Error),
#[error("other: {0}")]
Other(String),
}

impl ReadPicaError {
Expand Down
Loading