Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions src/nemosis/data_fetch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def cache_compiler(
**kwargs: additional arguments passed to the pd.to_{fformat}() function

Returns:
Nothing
List[str] of full paths for the processed files
"""
if not _os.path.isdir(raw_data_location):
raise UserInputError("The raw_data_location provided does not exist.")
Expand Down Expand Up @@ -215,7 +215,7 @@ def cache_compiler(
end_time = _datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S")
start_search = _datetime.strptime(start_search, "%Y/%m/%d %H:%M:%S")

_dynamic_data_fetch_loop(
paths = _dynamic_data_fetch_loop(
start_search,
start_time,
end_time,
Expand All @@ -229,7 +229,7 @@ def cache_compiler(
rebuild=rebuild,
write_kwargs=kwargs,
)
return
return paths


def static_table(
Expand Down Expand Up @@ -543,8 +543,11 @@ def _dynamic_data_fetch_loop(
1. If it does, read the data in and write any required files
(parquet or feather).
2. If it does not, download data then do the same as 1.

Returns: List[str] if caching_mode=False, else List[pd.Dataframe]
"""
data_tables = []
final_filenames = []

table_type = _defaults.table_types[table_name]
date_gen = _processing_info_maps.date_gen[table_type](start_search, end_time)
Expand Down Expand Up @@ -635,7 +638,13 @@ def _dynamic_data_fetch_loop(
if data is None or '#' not in filename_stub:
check_for_next_data_chunk = False

return data_tables
if caching_mode and (data is not None):
final_filenames.append(full_filename)

if caching_mode:
return final_filenames
else:
return data_tables


def _perform_column_selection(data, select_columns, full_filename):
Expand Down
38 changes: 38 additions & 0 deletions tests/test_data_fetch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,44 @@ def test_caching_with_select_columns_works(self):
self.assertSequenceEqual(got_columns, expected_columns)
print("Passed")

def test_caching_returns_filenames(self):
start_time = "2024/11/01 00:05:00"
end_time = "2024/12/30 23:50:00"
expected_file_count = 12-11+1
table = "DISPATCHREGIONSUM"
column_subset = ["SETTLEMENTDATE", "REGIONID", "TOTALDEMAND"]

# if some other test downloads this same table
# for a different date range, we'll get an unexpected number of files
subcache = os.path.join(defaults.raw_data_cache, 'test_caching_returns_filenames')

for fmt in ["feather", "parquet", "csv"]:
for (select_columns, rebuild) in [(None, True), (None, False), (column_subset, True)]:
filenames = data_fetch_methods.cache_compiler(
start_time,
end_time,
table,
subcache,
select_columns=select_columns,
fformat=fmt,
rebuild=rebuild,
)
self.assertIsNotNone(filenames)
for filename in filenames:
self.assertTrue(os.path.isfile(filename), f"{filename} does not exist")

self.assertTrue(
filename.endswith('.' + fmt),
"wrong file format returned"
)

self.assertTrue(
os.path.commonpath([filename, subcache]) == subcache,
"file not inside cache"
)

self.assertEqual(len(filenames), expected_file_count, "unexpected number of files returned from cache")


class TestDynamicDataCompilerWithStartDateFiltering(unittest.TestCase):
def setUp(self):
Expand Down