Skip to content

Commit 6f53109

Browse files
committed
Trying to figure out a nicer API
1 parent a93dc78 commit 6f53109

File tree

14 files changed

+516
-298
lines changed

14 files changed

+516
-298
lines changed

Cargo.lock

Lines changed: 24 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gedcomesque/examples/gogogo.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,18 @@ use sea_orm::{
1515
#[derive(derive_more::Display, errful::Error, derive_more::From, Debug)]
1616
enum Error {
1717
#[display("I/O error")]
18-
Io { source: std::io::Error },
18+
Io {
19+
source: std::io::Error,
20+
},
1921

2022
#[display("Database error")]
21-
Database { source: sea_orm::DbErr },
22-
23-
#[display("Parse error")]
24-
Parse {
25-
source: gedcomfy::parser::ParseError,
23+
Database {
24+
source: sea_orm::DbErr,
2625
},
26+
27+
Parse(gedcomfy::parser::ParserError<'static>),
28+
29+
FileLoad(gedcomfy::parser::FileLoadError),
2730
}
2831

2932
#[tokio::main(flavor = "current_thread")]
@@ -34,8 +37,8 @@ async fn main() -> ExitResult<Error> {
3437
let opts = ParseOptions::default().force_encoding(SupportedEncoding::Windows1252);
3538
let file_size = { std::fs::File::open(&path)?.metadata()?.len() };
3639
let start_time = Instant::now();
37-
let mut parser = Parser::read_file(path, opts)?;
38-
let records = parser.parse_raw()?;
40+
let mut parser = Parser::with_options(opts).load_file(&path)?;
41+
let records = parser.raw_records().map_err(|e| e.to_static())?;
3942
let elapsed = start_time.elapsed().as_secs_f64();
4043
println!(
4144
"parsed {filename} in {}s: ({} bytes, {} records, {} records/s)",

gedcomfy/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ tracing = { version = "0.1", features = ["attributes"] }
2626
vec1 = "1.12.1"
2727
yoke = { version = "0.7.4", features = ["derive"] }
2828
itertools = "0.14.0"
29+
memmap2 = "0.9.5"
30+
dunce = "1.0.5"
2931

3032

3133
[dev-dependencies]

gedcomfy/src/lib.rs

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! This is a library for parsing and validating GEDCOM files.
22
33
use core::str;
4-
use std::path::Path;
54

65
use miette::{Context, IntoDiagnostic, SourceSpan};
76
use parser::{
@@ -16,6 +15,8 @@ pub mod parser;
1615
pub mod schemas;
1716
pub mod versions;
1817

18+
pub use parser::Parser;
19+
1920
#[derive(
2021
derive_more::Error, derive_more::Display, derive_more::From, Debug, miette::Diagnostic,
2122
)]
@@ -42,28 +43,6 @@ impl<S: GEDCOMSource + ?Sized> RawRecord<'_, S> {
4243
}
4344
}
4445

45-
pub fn validate_file(
46-
path: &Path,
47-
parse_options: ParseOptions,
48-
) -> Result<parser::ValidationResult, miette::Report> {
49-
let mut parser = parser::Parser::read_file(path, parse_options)
50-
.into_diagnostic()
51-
.with_context(|| format!("Parsing file {}", path.display()))?;
52-
53-
Ok(parser.validate()?)
54-
}
55-
56-
pub fn parse_file(
57-
path: &Path,
58-
parse_options: ParseOptions,
59-
) -> Result<parser::ParseResult, miette::Report> {
60-
let mut parser = parser::Parser::read_file(path, parse_options)
61-
.into_diagnostic()
62-
.with_context(|| format!("Parsing file {}", path.display()))?;
63-
64-
Ok(parser.parse()?)
65-
}
66-
6746
#[derive(derive_more::Error, derive_more::Display, Debug, miette::Diagnostic)]
6847
pub enum FileStructureError {
6948
#[display("Missing HEAD record")]

gedcomfy/src/parser/encodings/mod.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,21 +82,21 @@ pub enum EncodingReason {
8282
BOMDetected { bom_length: usize },
8383

8484
#[display(
85-
"this encoding was detected from start of file content (no byte-order mark was present)"
85+
"this encoding was detected from the start of the file content (no byte-order mark was present)"
8686
)]
8787
#[diagnostic(severity(Advice), code(gedcom::encoding_reason::sniffed))]
8888
Sniffed {},
8989

90-
#[display("this encoding was specified in the GEDCOM header")]
90+
#[display("this encoding was used because it was specified in the GEDCOM header")]
9191
#[diagnostic(severity(Advice), code(gedcom::encoding_reason::header))]
9292
SpecifiedInHeader {
9393
#[label("encoding was specified here")]
9494
span: SourceSpan,
9595
},
9696

9797
#[display(
98-
"this encoding was used because it is required by GEDCOM version {version}{}",
99-
if span.is_none() { " (this version was forced by parsing options)" } else { "" }
98+
"this encoding was used because it is required by the GEDCOM version used: {version}{}",
99+
if span.is_none() { " (this version was selected explicitly in the options)" } else { "" }
100100
)]
101101
#[diagnostic(severity(Advice))]
102102
DeterminedByVersion {
@@ -131,7 +131,7 @@ pub enum EncodingError {
131131

132132
#[display(
133133
"GEDCOM version {version}{} requires the encoding to be {version_encoding}, but the file encoding was determined to be {external_encoding}",
134-
if version_span.is_none() { " (this version was forced by parsing options)" } else { "" }
134+
if version_span.is_none() { " (this version was selected explicitly in the options)" } else { "" }
135135
)]
136136
#[diagnostic(code(gedcom::encoding::version_encoding_mismatch))]
137137
VersionEncodingMismatch {
@@ -252,7 +252,7 @@ impl DetectedEncoding {
252252
match self.encoding {
253253
SupportedEncoding::Ascii => {
254254
let ascii_err = match data.as_ascii_str() {
255-
Ok(str) => return Ok(str.as_str().into()),
255+
Ok(ascii_str) => return Ok(ascii_str.as_str().into()),
256256
Err(err) => err,
257257
};
258258

@@ -279,7 +279,10 @@ impl DetectedEncoding {
279279
.copied(),
280280
);
281281

282-
tracing::debug!("data to decode is {}", String::from_utf8_lossy(&to_show));
282+
tracing::debug!(
283+
data_as_utf8 = String::from_utf8_lossy(&to_show).as_ref(),
284+
"data failed to decode"
285+
);
283286

284287
let mut possible_encodings = Vec::new();
285288
for encoding in [SupportedEncoding::Windows1252, SupportedEncoding::Utf8] {
@@ -295,7 +298,7 @@ impl DetectedEncoding {
295298
Ok(decoded) => {
296299
// if we decoded to something containing control characters,
297300
// it’s not valid
298-
if decoded.as_ref().chars().all(|c| !c.is_control()) {
301+
if decoded.chars().all(|c| !c.is_control()) {
299302
possible_encodings.push(PossibleEncoding {
300303
encoding,
301304
data_in_encoding: decoded.into_owned(),

0 commit comments

Comments
 (0)