Skip to content

Commit 48d0f25

Browse files
authored
SAC-30405: handle empty initial sync (#87)
* add warning logs for debugging * more debuggery * even more debuggery * moar * ensure bookmark for stream * bump singer-python * add test * use same bookmarks csv in both tests * correctly formatted start_date * bump version and add changelog entry
1 parent dec9a94 commit 48d0f25

File tree

4 files changed

+59
-2
lines changed

4 files changed

+59
-2
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 2.2.4
4+
* Write bookmarks for streams that sync no records [#87](https://github.com/singer-io/tap-s3-csv/pull/87)
5+
36
## 2.2.3
47
* Bumps urllib3 dependency for twistlock compliance [#86](https://github.com/singer-io/tap-s3-csv/pull/86)
58

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from setuptools import setup
44

55
setup(name='tap-s3-csv',
6-
version='2.2.3',
6+
version='2.2.4',
77
description='Singer.io tap for extracting CSV files from S3',
88
author='Stitch',
99
url='https://singer.io',
@@ -14,7 +14,7 @@
1414
'boto3==1.39.8',
1515
'urllib3==2.6.3',
1616
'singer-encodings==0.3.0',
17-
'singer-python==5.14.3',
17+
'singer-python==5.19.0',
1818
'voluptuous==0.15.2',
1919
's3fs==2025.9.0'
2020
],

tap_s3_csv/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def do_sync(config, catalog, state, sync_start_time):
4040
LOGGER.info("%s: Skipping - not selected", stream_name)
4141
continue
4242

43+
bookmark = singer.get_bookmark(state, stream_name, 'modified_since') or config['start_date']
44+
state = singer.set_bookmark(state, stream_name, 'modified_since', bookmark)
4345
singer.write_state(state)
4446
key_properties = metadata.get(mdata, (), 'table-key-properties')
4547
singer.write_schema(stream_name, stream['schema'], key_properties)

tests/test_bookmarks.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime, date, timedelta, time
12
from tap_tester import connections, menagerie, runner
23
from functools import reduce
34
from singer import metadata
@@ -102,3 +103,54 @@ def test_run(self):
102103
records = runner.get_records_from_target_output()
103104
messages = records.get('chickens', {}).get('messages', [])
104105
self.assertEqual(len(messages), 0, msg="Sync'd incorrect count of messages: {}".format(len(messages)))
106+
107+
class S3BookmarksStartDateSucceedsModifiedDate(S3CSVBaseTest):
108+
109+
table_entry = [{'table_name': 'skipped', 'search_prefix': 'tap_tester', 'search_pattern': 'tap_tester/bookmarks.*', 'key_properties': 'name'}]
110+
111+
def setUp(self):
112+
self.conn_id = connections.ensure_connection(self)
113+
114+
def resource_name(self):
115+
return "bookmarks.csv"
116+
117+
def name(self):
118+
return "tap_tester_s3_csv_bookmarks"
119+
120+
def expected_check_streams(self):
121+
return {
122+
'skipped'
123+
}
124+
125+
def expected_sync_streams(self):
126+
return {
127+
'skipped'
128+
}
129+
130+
def expected_pks(self):
131+
return {
132+
'skipped': {"name"}
133+
}
134+
135+
def get_properties(self, original: bool = True):
136+
tomorrow = date.today() + timedelta(days=1)
137+
self.start_date = datetime.combine(tomorrow, time.min).strftime("%Y-%m-%dT%H:%M:%SZ")
138+
139+
return super().get_properties(original) | {'start_date': self.start_date}
140+
141+
def test_run(self):
142+
found_catalogs = self.run_and_verify_check_mode(self.conn_id)
143+
144+
# Select our catalogs
145+
our_catalogs = [c for c in found_catalogs if c.get('tap_stream_id') in self.expected_sync_streams()]
146+
147+
self.perform_and_verify_table_and_field_selection(self.conn_id, our_catalogs)
148+
149+
# Clear state before our run
150+
menagerie.set_state(self.conn_id, {})
151+
152+
# Sync 0 records because start date is after file modified_date
153+
self.run_and_verify_sync(self.conn_id, True)
154+
155+
expected_state = {'bookmarks': {'skipped': {'modified_since': self.start_date}}}
156+
self.assertEqual(menagerie.get_state(self.conn_id), expected_state)

0 commit comments

Comments
 (0)