Skip to content

Commit a6f7e18

Browse files
committed
Check BSS existence directly - see HEA-572
1 parent b2adb37 commit a6f7e18

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

pipelines/assets/base.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Optional
99

1010
import django
11+
import fsspec
1112
import msoffcrypto
1213
import openpyxl
1314
import pandas as pd
@@ -123,6 +124,16 @@ def completed_bss_metadata(config: BSSMetadataConfig, bss_metadata) -> Output[pd
123124
"""
124125
A DataFrame containing the BSS Metadata that has been completed sufficiently to allow the BSS to be loaded.
125126
"""
127+
# Exclude BSS where the listed BSS file does not exist
128+
protocol, bss_files_folder = config.bss_files_folder.split("://")
129+
# The listings cache is disabled because otherwise gdrivefs returns an empty list of files
130+
fs = fsspec.filesystem(protocol, use_listings_cache=False, **config.bss_files_storage_options)
131+
bss_paths = []
132+
for dirpath, dirnames, filenames in fs.walk(bss_files_folder):
133+
bss_paths.extend([os.path.join(dirpath, filename)[len(bss_files_folder) + 1 :] for filename in filenames])
134+
bss_metadata = bss_metadata[bss_metadata["bss_path"].isin(bss_paths)].sort_values(by="bss_path")
135+
136+
# Exclude those BSS where one or more mandatory columns have not been completed
126137
required_columns = [
127138
"bss_path",
128139
"code",
@@ -135,10 +146,8 @@ def completed_bss_metadata(config: BSSMetadataConfig, bss_metadata) -> Output[pd
135146
"reference_year_end_date",
136147
"valid_from_date",
137148
]
138-
bss_metadata = bss_metadata[bss_metadata["bss_exists"]].sort_values(by="bss_path")
139149
mask = bss_metadata[required_columns].map(lambda x: x == "")
140150

141-
# Drop rows where any of the specified columns have empty strings
142151
complete_df = bss_metadata[~mask.any(axis="columns")]
143152
incomplete_df = bss_metadata[mask.any(axis="columns")]
144153

0 commit comments

Comments
 (0)