Skip to content

Commit e752001

Browse files
committed
Store PDF alongside materialized datasheet outputs
Copy or reuse the PDF inside the content-hash materialized directory so markdown, images, and PDF live together. Return pdf_path from the materialized directory and keep pretty filenames, while deduplicating cache lookup paths.
1 parent a110ec7 commit e752001

File tree

1 file changed

+92
-38
lines changed

1 file changed

+92
-38
lines changed

crates/pcb-diode-api/src/datasheet.rs

Lines changed: 92 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,15 @@ fn resolve_source_url_datasheet(
114114
let url_cache_dir = url_pdf_cache_dir(&canonical_url)?;
115115
fs::create_dir_all(&url_cache_dir)?;
116116

117-
let (pdf_path, prefetched_process) =
118-
if let Some(cached_pdf) = first_valid_cached_pdf(&url_cache_dir)? {
119-
(cached_pdf, None)
120-
} else {
121-
let (process, downloaded_pdf_path) =
122-
fetch_url_pdf_via_backend(client, auth_token, &canonical_url, &url_cache_dir)?;
123-
(downloaded_pdf_path, Some(process))
124-
};
117+
let (pdf_path, prefetched_process) = if let Some(cached_pdf) =
118+
first_valid_file_in_dir(&url_cache_dir, None, is_valid_cached_pdf)?
119+
{
120+
(cached_pdf, None)
121+
} else {
122+
let (process, downloaded_pdf_path) =
123+
fetch_url_pdf_via_backend(client, auth_token, &canonical_url, &url_cache_dir)?;
124+
(downloaded_pdf_path, Some(process))
125+
};
125126

126127
let execution = ResolveExecution::from_pdf_path(pdf_path, Some(canonical_url))?;
127128
execute_resolve_execution(client, auth_token, execution, prefetched_process)
@@ -137,20 +138,22 @@ fn execute_resolve_execution(
137138
let materialization_id = materialization_id_for_key(&execution.pdf_sha256)?;
138139
let materialized_dir = materialized_dir(&materialization_id);
139140
let markdown_path = materialized_dir.join(inferred_markdown_filename(&execution.pdf_path));
140-
let cached_markdown_path = first_valid_cached_markdown(&materialized_dir, &markdown_path)?;
141+
let cached_markdown_path =
142+
first_valid_file_in_dir(&materialized_dir, Some(&markdown_path), is_valid_markdown_file)?;
141143
let images_dir = materialized_dir.join("images");
142144
let complete_marker = materialized_dir.join(".complete");
143145
let has_materialized_cache = cached_markdown_path.is_some()
144146
&& images_dir.is_dir()
145147
&& is_non_empty_file(&complete_marker)?;
146148

147-
if has_materialized_cache && is_valid_cached_pdf(&execution.pdf_path)? {
149+
if has_materialized_cache {
148150
let cached_markdown_path = cached_markdown_path
149151
.context("Materialized cache is marked complete but markdown file is missing")?;
152+
let materialized_pdf_path = ensure_materialized_pdf(&materialized_dir, &execution.pdf_path)?;
150153
return Ok(build_resolve_response(
151154
&cached_markdown_path,
152155
&images_dir,
153-
&execution.pdf_path,
156+
&materialized_pdf_path,
154157
execution.datasheet_url,
155158
));
156159
}
@@ -193,11 +196,12 @@ fn execute_resolve_execution(
193196
&images_dir,
194197
&complete_marker,
195198
)?;
199+
let materialized_pdf_path = ensure_materialized_pdf(&materialized_dir, &execution.pdf_path)?;
196200

197201
Ok(build_resolve_response(
198202
&markdown_path,
199203
&images_dir,
200-
&execution.pdf_path,
204+
&materialized_pdf_path,
201205
execution.datasheet_url,
202206
))
203207
}
@@ -473,8 +477,18 @@ fn url_pdf_cache_dir(canonical_url: &str) -> Result<PathBuf> {
473477
Ok(url_pdf_cache_root_dir().join(key))
474478
}
475479

476-
fn first_valid_cached_pdf(url_cache_dir: &Path) -> Result<Option<PathBuf>> {
477-
let entries = match fs::read_dir(url_cache_dir) {
480+
fn first_valid_file_in_dir(
481+
dir: &Path,
482+
preferred_path: Option<&Path>,
483+
is_valid: fn(&Path) -> Result<bool>,
484+
) -> Result<Option<PathBuf>> {
485+
if let Some(path) = preferred_path {
486+
if is_valid(path)? {
487+
return Ok(Some(path.to_path_buf()));
488+
}
489+
}
490+
491+
let entries = match fs::read_dir(dir) {
478492
Ok(entries) => entries,
479493
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
480494
Err(err) => return Err(err.into()),
@@ -483,45 +497,60 @@ fn first_valid_cached_pdf(url_cache_dir: &Path) -> Result<Option<PathBuf>> {
483497
for entry in entries {
484498
let entry = entry?;
485499
let path = entry.path();
486-
if path.is_file() && is_valid_cached_pdf(&path)? {
500+
if is_valid(&path)? {
487501
return Ok(Some(path));
488502
}
489503
}
490504

491505
Ok(None)
492506
}
493507

494-
fn first_valid_cached_markdown(
495-
materialized_dir: &Path,
496-
preferred_markdown_path: &Path,
497-
) -> Result<Option<PathBuf>> {
498-
if is_non_empty_file(preferred_markdown_path)? {
499-
return Ok(Some(preferred_markdown_path.to_path_buf()));
508+
fn ensure_materialized_pdf(materialized_dir: &Path, source_pdf_path: &Path) -> Result<PathBuf> {
509+
fs::create_dir_all(materialized_dir)?;
510+
let preferred_pdf_path = materialized_dir.join(inferred_pdf_filename(source_pdf_path));
511+
if let Some(existing_pdf_path) = first_valid_file_in_dir(
512+
materialized_dir,
513+
Some(&preferred_pdf_path),
514+
is_valid_materialized_pdf_file,
515+
)? {
516+
return Ok(existing_pdf_path);
517+
}
518+
519+
fs::copy(source_pdf_path, &preferred_pdf_path)
520+
.with_context(|| format!("Failed to copy PDF into {}", preferred_pdf_path.display()))?;
521+
if !is_valid_cached_pdf(&preferred_pdf_path)? {
522+
let _ = fs::remove_file(&preferred_pdf_path);
523+
anyhow::bail!(
524+
"Copied PDF in materialized cache is invalid: {}",
525+
preferred_pdf_path.display()
526+
);
500527
}
501528

502-
let entries = match fs::read_dir(materialized_dir) {
503-
Ok(entries) => entries,
504-
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
505-
Err(err) => return Err(err.into()),
506-
};
529+
Ok(preferred_pdf_path)
530+
}
507531

508-
for entry in entries {
509-
let entry = entry?;
510-
let path = entry.path();
511-
if is_markdown_file(&path) && is_non_empty_file(&path)? {
512-
return Ok(Some(path));
513-
}
514-
}
532+
fn is_markdown_file(path: &Path) -> bool {
533+
is_file_with_extension(path, "md")
534+
}
515535

516-
Ok(None)
536+
fn is_pdf_file(path: &Path) -> bool {
537+
is_file_with_extension(path, "pdf")
517538
}
518539

519-
fn is_markdown_file(path: &Path) -> bool {
540+
fn is_valid_markdown_file(path: &Path) -> Result<bool> {
541+
Ok(is_markdown_file(path) && is_non_empty_file(path)?)
542+
}
543+
544+
fn is_valid_materialized_pdf_file(path: &Path) -> Result<bool> {
545+
Ok(is_pdf_file(path) && is_valid_cached_pdf(path)?)
546+
}
547+
548+
fn is_file_with_extension(path: &Path, extension: &str) -> bool {
520549
path.is_file()
521550
&& path
522551
.extension()
523552
.and_then(|ext| ext.to_str())
524-
.is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
553+
.is_some_and(|ext| ext.eq_ignore_ascii_case(extension))
525554
}
526555

527556
fn infer_source_pdf_filename(source_pdf_url: &str) -> Result<String> {
@@ -629,7 +658,7 @@ mod tests {
629658
let pdf_path = dir.join("blob");
630659
fs::write(&pdf_path, b"%PDF-1.7\n").unwrap();
631660

632-
let found = first_valid_cached_pdf(&dir).unwrap();
661+
let found = first_valid_file_in_dir(&dir, None, is_valid_cached_pdf).unwrap();
633662
assert_eq!(found.as_deref(), Some(pdf_path.as_path()));
634663

635664
fs::remove_dir_all(dir).unwrap();
@@ -643,12 +672,36 @@ mod tests {
643672
fs::write(&existing_markdown, b"# Datasheet\n").unwrap();
644673
let preferred_markdown = dir.join("datasheet.md");
645674

646-
let found = first_valid_cached_markdown(&dir, &preferred_markdown).unwrap();
675+
let found = first_valid_file_in_dir(
676+
&dir,
677+
Some(&preferred_markdown),
678+
is_valid_markdown_file,
679+
)
680+
.unwrap();
647681
assert_eq!(found.as_deref(), Some(existing_markdown.as_path()));
648682

649683
fs::remove_dir_all(dir).unwrap();
650684
}
651685

686+
#[test]
687+
fn test_ensure_materialized_pdf_reuses_existing_pdf_name() {
688+
let dir = std::env::temp_dir().join(format!("datasheet-pdf-cache-dir-{}", Uuid::new_v4()));
689+
fs::create_dir_all(&dir).unwrap();
690+
691+
let existing_pdf = dir.join("ad574a.pdf");
692+
fs::write(&existing_pdf, b"%PDF-1.7\nexisting").unwrap();
693+
694+
let source_pdf =
695+
std::env::temp_dir().join(format!("datasheet-source-{}.pdf", Uuid::new_v4()));
696+
fs::write(&source_pdf, b"%PDF-1.7\nsource").unwrap();
697+
698+
let materialized = ensure_materialized_pdf(&dir, &source_pdf).unwrap();
699+
assert_eq!(materialized, existing_pdf);
700+
701+
fs::remove_dir_all(dir).unwrap();
702+
fs::remove_file(source_pdf).unwrap();
703+
}
704+
652705
#[test]
653706
fn test_extract_datasheet_url_from_symbols_uses_first_valid_value() {
654707
let source = r#"(kicad_symbol_lib
@@ -768,6 +821,7 @@ mod tests {
768821
assert!(value.get("images_dir").is_some());
769822
assert!(value.get("pdf_path").is_some());
770823
assert!(value.get("datasheet_url").is_some());
824+
assert!(value.get("materialized_dir").is_none());
771825
assert!(value.get("sha256").is_none());
772826
assert!(value.get("source_pdf_url").is_none());
773827
}

0 commit comments

Comments
 (0)