Skip to content

Commit 5b63994

Browse files
committed
from-pdf-pt now uses *.pdf as default glob; prepared for release
1 parent c93bf71 commit 5b63994

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

CHANGES.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
Changelog
22
=========
33

4+
0.0.2 (2024-07-05)
5+
------------------
6+
7+
- `from-pdf-pt` now uses `*.pdf` as default glob
8+
9+
410
0.0.1 (2024-05-02)
511
------------------
612

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ def _read(f):
3131
},
3232
packages=find_namespace_packages(where='src'),
3333
install_requires=[
34-
"llm-dataset-converter>=0.2.1",
34+
"llm-dataset-converter>=0.2.4",
3535
"pypdf",
3636
"simple-range>=0.0.3",
3737
],
38-
version="0.0.1",
38+
version="0.0.2",
3939
author='Peter Reutemann',
4040
author_email='fracpete@waikato.ac.nz',
4141
entry_points={

src/ldc_pdf/pretrain/_pdf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def initialize(self):
102102
Initializes the reading, e.g., for opening files or databases.
103103
"""
104104
super().initialize()
105-
self._inputs = locate_files(self.source, input_lists=self.source_list, fail_if_empty=True)
105+
self._inputs = locate_files(self.source, input_lists=self.source_list, fail_if_empty=True, default_glob="*.pdf")
106106
if self.page_range is None:
107107
self.page_range = "first-last"
108108
if self.combine_pages is None:

0 commit comments

Comments
 (0)