Skip to content

Commit f254987

Browse files
committed
PR Changes
1 parent 6376b69 commit f254987

File tree

3 files changed

+11
-16
lines changed

3 files changed

+11
-16
lines changed

src/sec_certs/dataset/cc.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Iterator
66
from datetime import datetime
77
from pathlib import Path
8-
from typing import ClassVar, cast
8+
from typing import TYPE_CHECKING, ClassVar, cast
99

1010
import numpy as np
1111
import pandas as pd
@@ -39,6 +39,9 @@
3939
from sec_certs.utils import helpers, sanitization
4040
from sec_certs.utils.profiling import staged
4141

42+
if TYPE_CHECKING:
43+
from sec_certs.converter import PDFConverter
44+
4245

4346
class CCDataset(Dataset[CCCertificate], ComplexSerializableType):
4447
"""
@@ -151,10 +154,6 @@ def to_pandas(self) -> pd.DataFrame:
151154

152155
return df
153156

154-
@property
155-
def dataset_name(self) -> str:
156-
return "CC"
157-
158157
@property
159158
@only_backed(throw=False)
160159
def reports_dir(self) -> Path:

src/sec_certs/dataset/cc_eucc_common.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
from collections.abc import Callable
1818
from typing import TYPE_CHECKING
1919

20-
from sec_certs.configuration import config
21-
from sec_certs.converter import PDFConverter
2220
from sec_certs.dataset.auxiliary_dataset_handling import (
2321
CCSchemeDatasetHandler,
2422
CPEDatasetHandler,
@@ -59,6 +57,8 @@
5957
from sec_certs.utils.profiling import staged
6058

6159
if TYPE_CHECKING:
60+
from sec_certs.configuration import config
61+
from sec_certs.converter import PDFConverter
6262
from sec_certs.dataset.cc import CCDataset
6363
from sec_certs.dataset.eucc import EUCCDataset
6464

@@ -82,7 +82,7 @@ def download_pdfs(
8282

8383
if not fresh:
8484
logger.info(
85-
f"Downloading {len(certs_to_process)} PDFs of {obj.dataset_name} {doc_type.long}s for which previous download failed."
85+
f"Downloading {len(certs_to_process)} PDFs of {obj.name} {doc_type.long}s for which previous download failed."
8686
)
8787

8888
download_pdf_funcs = {
@@ -94,7 +94,7 @@ def download_pdfs(
9494
cert_processing.process_parallel(
9595
download_pdf_funcs[doc_type],
9696
certs_to_process,
97-
progress_bar_desc=f"Downloading PDFs of {obj.dataset_name} {doc_type.long}s",
97+
progress_bar_desc=f"Downloading PDFs of {obj.name} {doc_type.long}s",
9898
)
9999

100100

@@ -136,7 +136,7 @@ def convert_pdfs(
136136

137137
if not fresh:
138138
logger.info(
139-
f"Converting {len(certs_to_process)} PDFs of {obj.dataset_name} {doc_type.long}s for which previous conversion failed."
139+
f"Converting {len(certs_to_process)} PDFs of {obj.name} {doc_type.long}s for which previous conversion failed."
140140
)
141141

142142
convert_pdf_funcs = {
@@ -152,7 +152,7 @@ def convert_pdfs(
152152
certs_to_process,
153153
config.pdf_conversion_workers,
154154
config.pdf_conversion_max_chunk_size,
155-
progress_bar_desc=f"Converting PDFs of {obj.dataset_name} {doc_type.long}s",
155+
progress_bar_desc=f"Converting PDFs of {obj.name} {doc_type.long}s",
156156
)
157157

158158
obj.update_with_certs(processed_certs)
@@ -189,7 +189,7 @@ def extract_generic(obj: CCDataset | EUCCDataset, doc_type: DocType, worker_func
189189
worker_func,
190190
certs_to_process,
191191
use_threading=False,
192-
progress_bar_desc=f"Extracting {obj.dataset_name} {doc_type.long} {worker_func.__name__.split('_')[-1]}",
192+
progress_bar_desc=f"Extracting {obj.name} {doc_type.long} {worker_func.__name__.split('_')[-1]}",
193193
)
194194
obj.update_with_certs(processed)
195195

src/sec_certs/dataset/eucc.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,6 @@ def __init__(
138138
),
139139
}
140140

141-
@property
142-
def dataset_name(self) -> str:
143-
return "EUCC"
144-
145141
@property
146142
@only_backed(throw=False)
147143
def reports_dir(self) -> Path:

0 commit comments

Comments
 (0)