Skip to content

Commit d13553f

Browse files
committed
Store PDF alongside materialized datasheet outputs
Copy or reuse the PDF inside the content-hash materialized directory so markdown, images, and PDF live together. Return pdf_path from the materialized directory and keep pretty filenames, while deduplicating cache lookup paths.
1 parent a110ec7 commit d13553f

File tree

1 file changed

+93
-38
lines changed

1 file changed

+93
-38
lines changed

crates/pcb-diode-api/src/datasheet.rs

Lines changed: 93 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,15 @@ fn resolve_source_url_datasheet(
114114
let url_cache_dir = url_pdf_cache_dir(&canonical_url)?;
115115
fs::create_dir_all(&url_cache_dir)?;
116116

117-
let (pdf_path, prefetched_process) =
118-
if let Some(cached_pdf) = first_valid_cached_pdf(&url_cache_dir)? {
119-
(cached_pdf, None)
120-
} else {
121-
let (process, downloaded_pdf_path) =
122-
fetch_url_pdf_via_backend(client, auth_token, &canonical_url, &url_cache_dir)?;
123-
(downloaded_pdf_path, Some(process))
124-
};
117+
let (pdf_path, prefetched_process) = if let Some(cached_pdf) =
118+
first_valid_file_in_dir(&url_cache_dir, None, is_valid_cached_pdf)?
119+
{
120+
(cached_pdf, None)
121+
} else {
122+
let (process, downloaded_pdf_path) =
123+
fetch_url_pdf_via_backend(client, auth_token, &canonical_url, &url_cache_dir)?;
124+
(downloaded_pdf_path, Some(process))
125+
};
125126

126127
let execution = ResolveExecution::from_pdf_path(pdf_path, Some(canonical_url))?;
127128
execute_resolve_execution(client, auth_token, execution, prefetched_process)
@@ -137,20 +138,26 @@ fn execute_resolve_execution(
137138
let materialization_id = materialization_id_for_key(&execution.pdf_sha256)?;
138139
let materialized_dir = materialized_dir(&materialization_id);
139140
let markdown_path = materialized_dir.join(inferred_markdown_filename(&execution.pdf_path));
140-
let cached_markdown_path = first_valid_cached_markdown(&materialized_dir, &markdown_path)?;
141+
let cached_markdown_path = first_valid_file_in_dir(
142+
&materialized_dir,
143+
Some(&markdown_path),
144+
is_valid_markdown_file,
145+
)?;
141146
let images_dir = materialized_dir.join("images");
142147
let complete_marker = materialized_dir.join(".complete");
143148
let has_materialized_cache = cached_markdown_path.is_some()
144149
&& images_dir.is_dir()
145150
&& is_non_empty_file(&complete_marker)?;
146151

147-
if has_materialized_cache && is_valid_cached_pdf(&execution.pdf_path)? {
152+
if has_materialized_cache {
148153
let cached_markdown_path = cached_markdown_path
149154
.context("Materialized cache is marked complete but markdown file is missing")?;
155+
let materialized_pdf_path =
156+
ensure_materialized_pdf(&materialized_dir, &execution.pdf_path)?;
150157
return Ok(build_resolve_response(
151158
&cached_markdown_path,
152159
&images_dir,
153-
&execution.pdf_path,
160+
&materialized_pdf_path,
154161
execution.datasheet_url,
155162
));
156163
}
@@ -193,11 +200,12 @@ fn execute_resolve_execution(
193200
&images_dir,
194201
&complete_marker,
195202
)?;
203+
let materialized_pdf_path = ensure_materialized_pdf(&materialized_dir, &execution.pdf_path)?;
196204

197205
Ok(build_resolve_response(
198206
&markdown_path,
199207
&images_dir,
200-
&execution.pdf_path,
208+
&materialized_pdf_path,
201209
execution.datasheet_url,
202210
))
203211
}
@@ -473,8 +481,18 @@ fn url_pdf_cache_dir(canonical_url: &str) -> Result<PathBuf> {
473481
Ok(url_pdf_cache_root_dir().join(key))
474482
}
475483

476-
fn first_valid_cached_pdf(url_cache_dir: &Path) -> Result<Option<PathBuf>> {
477-
let entries = match fs::read_dir(url_cache_dir) {
484+
fn first_valid_file_in_dir(
485+
dir: &Path,
486+
preferred_path: Option<&Path>,
487+
is_valid: fn(&Path) -> Result<bool>,
488+
) -> Result<Option<PathBuf>> {
489+
if let Some(path) = preferred_path
490+
&& is_valid(path)?
491+
{
492+
return Ok(Some(path.to_path_buf()));
493+
}
494+
495+
let entries = match fs::read_dir(dir) {
478496
Ok(entries) => entries,
479497
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
480498
Err(err) => return Err(err.into()),
@@ -483,45 +501,60 @@ fn first_valid_cached_pdf(url_cache_dir: &Path) -> Result<Option<PathBuf>> {
483501
for entry in entries {
484502
let entry = entry?;
485503
let path = entry.path();
486-
if path.is_file() && is_valid_cached_pdf(&path)? {
504+
if is_valid(&path)? {
487505
return Ok(Some(path));
488506
}
489507
}
490508

491509
Ok(None)
492510
}
493511

494-
fn first_valid_cached_markdown(
495-
materialized_dir: &Path,
496-
preferred_markdown_path: &Path,
497-
) -> Result<Option<PathBuf>> {
498-
if is_non_empty_file(preferred_markdown_path)? {
499-
return Ok(Some(preferred_markdown_path.to_path_buf()));
512+
fn ensure_materialized_pdf(materialized_dir: &Path, source_pdf_path: &Path) -> Result<PathBuf> {
513+
fs::create_dir_all(materialized_dir)?;
514+
let preferred_pdf_path = materialized_dir.join(inferred_pdf_filename(source_pdf_path));
515+
if let Some(existing_pdf_path) = first_valid_file_in_dir(
516+
materialized_dir,
517+
Some(&preferred_pdf_path),
518+
is_valid_materialized_pdf_file,
519+
)? {
520+
return Ok(existing_pdf_path);
521+
}
522+
523+
fs::copy(source_pdf_path, &preferred_pdf_path)
524+
.with_context(|| format!("Failed to copy PDF into {}", preferred_pdf_path.display()))?;
525+
if !is_valid_cached_pdf(&preferred_pdf_path)? {
526+
let _ = fs::remove_file(&preferred_pdf_path);
527+
anyhow::bail!(
528+
"Copied PDF in materialized cache is invalid: {}",
529+
preferred_pdf_path.display()
530+
);
500531
}
501532

502-
let entries = match fs::read_dir(materialized_dir) {
503-
Ok(entries) => entries,
504-
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
505-
Err(err) => return Err(err.into()),
506-
};
533+
Ok(preferred_pdf_path)
534+
}
507535

508-
for entry in entries {
509-
let entry = entry?;
510-
let path = entry.path();
511-
if is_markdown_file(&path) && is_non_empty_file(&path)? {
512-
return Ok(Some(path));
513-
}
514-
}
536+
fn is_markdown_file(path: &Path) -> bool {
537+
is_file_with_extension(path, "md")
538+
}
515539

516-
Ok(None)
540+
fn is_pdf_file(path: &Path) -> bool {
541+
is_file_with_extension(path, "pdf")
517542
}
518543

519-
fn is_markdown_file(path: &Path) -> bool {
544+
fn is_valid_markdown_file(path: &Path) -> Result<bool> {
545+
Ok(is_markdown_file(path) && is_non_empty_file(path)?)
546+
}
547+
548+
fn is_valid_materialized_pdf_file(path: &Path) -> Result<bool> {
549+
Ok(is_pdf_file(path) && is_valid_cached_pdf(path)?)
550+
}
551+
552+
fn is_file_with_extension(path: &Path, extension: &str) -> bool {
520553
path.is_file()
521554
&& path
522555
.extension()
523556
.and_then(|ext| ext.to_str())
524-
.is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
557+
.is_some_and(|ext| ext.eq_ignore_ascii_case(extension))
525558
}
526559

527560
fn infer_source_pdf_filename(source_pdf_url: &str) -> Result<String> {
@@ -629,7 +662,7 @@ mod tests {
629662
let pdf_path = dir.join("blob");
630663
fs::write(&pdf_path, b"%PDF-1.7\n").unwrap();
631664

632-
let found = first_valid_cached_pdf(&dir).unwrap();
665+
let found = first_valid_file_in_dir(&dir, None, is_valid_cached_pdf).unwrap();
633666
assert_eq!(found.as_deref(), Some(pdf_path.as_path()));
634667

635668
fs::remove_dir_all(dir).unwrap();
@@ -643,12 +676,33 @@ mod tests {
643676
fs::write(&existing_markdown, b"# Datasheet\n").unwrap();
644677
let preferred_markdown = dir.join("datasheet.md");
645678

646-
let found = first_valid_cached_markdown(&dir, &preferred_markdown).unwrap();
679+
let found =
680+
first_valid_file_in_dir(&dir, Some(&preferred_markdown), is_valid_markdown_file)
681+
.unwrap();
647682
assert_eq!(found.as_deref(), Some(existing_markdown.as_path()));
648683

649684
fs::remove_dir_all(dir).unwrap();
650685
}
651686

687+
#[test]
688+
fn test_ensure_materialized_pdf_reuses_existing_pdf_name() {
689+
let dir = std::env::temp_dir().join(format!("datasheet-pdf-cache-dir-{}", Uuid::new_v4()));
690+
fs::create_dir_all(&dir).unwrap();
691+
692+
let existing_pdf = dir.join("ad574a.pdf");
693+
fs::write(&existing_pdf, b"%PDF-1.7\nexisting").unwrap();
694+
695+
let source_pdf =
696+
std::env::temp_dir().join(format!("datasheet-source-{}.pdf", Uuid::new_v4()));
697+
fs::write(&source_pdf, b"%PDF-1.7\nsource").unwrap();
698+
699+
let materialized = ensure_materialized_pdf(&dir, &source_pdf).unwrap();
700+
assert_eq!(materialized, existing_pdf);
701+
702+
fs::remove_dir_all(dir).unwrap();
703+
fs::remove_file(source_pdf).unwrap();
704+
}
705+
652706
#[test]
653707
fn test_extract_datasheet_url_from_symbols_uses_first_valid_value() {
654708
let source = r#"(kicad_symbol_lib
@@ -768,6 +822,7 @@ mod tests {
768822
assert!(value.get("images_dir").is_some());
769823
assert!(value.get("pdf_path").is_some());
770824
assert!(value.get("datasheet_url").is_some());
825+
assert!(value.get("materialized_dir").is_none());
771826
assert!(value.get("sha256").is_none());
772827
assert!(value.get("source_pdf_url").is_none());
773828
}

0 commit comments

Comments
 (0)