@@ -531,7 +531,13 @@ class DocDetails(Doc):
531
531
)
532
532
doi : str | None = None
533
533
doi_url : str | None = None
534
- doc_id : str | None = None
534
+ doc_id : str | None = Field (
535
+ default = None ,
536
+ description = (
537
+ "Unique ID for this document. Simple ways to acquire one include"
538
+ " hashing the DOI or a stringifying a UUID."
539
+ ),
540
+ )
535
541
file_location : str | os .PathLike | None = None
536
542
license : str | None = Field (
537
543
default = None ,
@@ -811,6 +817,29 @@ def __getitem__(self, item: str):
811
817
except AttributeError :
812
818
return self .other [item ]
813
819
820
+ def make_filename (self , title_limit : int | None = 48 ) -> str :
821
+ """
822
+ Make a filesystem-safe filename that has the doc ID appended, but no extension.
823
+
824
+ Args:
825
+ title_limit: Character limit on the title.
826
+
827
+ Returns:
828
+ Filename that is filesystem safe (e.g. non-safe chars are replaced with dash).
829
+ """
830
+ if not self .title or not self .doc_id :
831
+ raise ValueError ("Unable to create filename without both title and doc_id." )
832
+ # SEE: https://stackoverflow.com/a/71199182
833
+ encoded_title = re .sub (
834
+ r"[/\\?%*:|\"<>\x7F\x00-\x1F]" , "-" , self .title [:title_limit ]
835
+ )
836
+ # NOTE: we append the doc ID for a few reasons:
837
+ # 1. Prevent collisions for identical titles
838
+ # SEE: https://stackoverflow.com/a/71761675
839
+ # 2. Filenames shouldn't end in a period,
840
+ # so append the doc ID to circumvent that gotcha
841
+ return "_" .join ((encoded_title , self .doc_id ))
842
+
814
843
@computed_field # type: ignore[prop-decorator]
815
844
@property
816
845
def formatted_citation (self ) -> str :
0 commit comments