Skip to content

Commit 22d91bd

Browse files
authored
Merge pull request #17330 from chamikaramj/cherry_pick_pr_17329
[cherry-pick][release-2.38.0][BEAM-14282] Re-raise exceptions swallowed in several Python I/O connectors
2 parents 12bc0e0 + e67cc36 commit 22d91bd

File tree

4 files changed

+57
-10
lines changed

4 files changed

+57
-10
lines changed

sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,10 @@ def process(self, query, *unused_args, **unused_kwargs):
301301
except (ClientError, GoogleAPICallError) as e:
302302
# e.code.value contains the numeric http status code.
303303
service_call_metric.call(e.code.value)
304+
raise
304305
except HttpError as e:
305306
service_call_metric.call(e)
307+
raise
306308

307309

308310
class _Mutate(PTransform):

sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,17 @@ def test_QueryFn_metric_on_failure(self):
328328
client_query.fetch.side_effect = [
329329
exceptions.DeadlineExceeded("Deadline exceed")
330330
]
331-
list(_query_fn.process(self._mock_query))
332-
self.verify_read_call_metric(
333-
self._PROJECT, self._NAMESPACE, "deadline_exceeded", 1)
334-
# Test success
335-
client_query.fetch.side_effect = [[]]
336-
list(_query_fn.process(self._mock_query))
337-
self.verify_read_call_metric(self._PROJECT, self._NAMESPACE, "ok", 1)
331+
try:
332+
list(_query_fn.process(self._mock_query))
333+
except Exception:
334+
self.verify_read_call_metric(
335+
self._PROJECT, self._NAMESPACE, "deadline_exceeded", 1)
336+
# Test success
337+
client_query.fetch.side_effect = [[]]
338+
list(_query_fn.process(self._mock_query))
339+
self.verify_read_call_metric(self._PROJECT, self._NAMESPACE, "ok", 1)
340+
else:
341+
raise Exception('Excepted _query_fn.process call to raise an error')
338342

339343
def verify_read_call_metric(self, project_id, namespace, status, count):
340344
"""Check if a metric was recorded for the Datastore IO read API call."""

sdks/python/apache_beam/io/gcp/gcsio.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def __init__(self, client, path, buffer_size, get_project_number):
642642
service_call_metric.call('ok')
643643
except HttpError as e:
644644
service_call_metric.call(e)
645+
raise
645646

646647
@retry.with_exponential_backoff(
647648
retry_filter=retry.retry_on_server_errors_and_timeout_filter)

sdks/python/apache_beam/io/gcp/gcsio_test.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,17 @@ def __init__(self):
103103
# has to persist even past the deletion of the object.
104104
self.last_generation = {}
105105
self.list_page_tokens = {}
106+
self._fail_when_getting_metadata = []
107+
self._fail_when_reading = []
106108

107-
def add_file(self, f):
109+
def add_file(
110+
self, f, fail_when_getting_metadata=False, fail_when_reading=False):
108111
self.files[(f.bucket, f.object)] = f
109112
self.last_generation[(f.bucket, f.object)] = f.generation
113+
if fail_when_getting_metadata:
114+
self._fail_when_getting_metadata.append(f)
115+
if fail_when_reading:
116+
self._fail_when_reading.append(f)
110117

111118
def get_file(self, bucket, obj):
112119
return self.files.get((bucket, obj), None)
@@ -123,8 +130,12 @@ def Get(self, get_request, download=None): # pylint: disable=invalid-name
123130
# Failing with an HTTP 404 if file does not exist.
124131
raise HttpError({'status': 404}, None, None)
125132
if download is None:
133+
if f in self._fail_when_getting_metadata:
134+
raise HttpError({'status': 429}, None, None)
126135
return f.get_metadata()
127136
else:
137+
if f in self._fail_when_reading:
138+
raise HttpError({'status': 429}, None, None)
128139
stream = download.stream
129140

130141
def get_range_callback(start, end):
@@ -303,7 +314,15 @@ def __init__(self, project, region, kms_key=None):
303314
'time', time=mock.MagicMock(side_effect=range(100)), sleep=mock.MagicMock())
304315
class TestGCSIO(unittest.TestCase):
305316
def _insert_random_file(
306-
self, client, path, size, generation=1, crc32c=None, last_updated=None):
317+
self,
318+
client,
319+
path,
320+
size,
321+
generation=1,
322+
crc32c=None,
323+
last_updated=None,
324+
fail_when_getting_metadata=False,
325+
fail_when_reading=False):
307326
bucket, name = gcsio.parse_gcs_path(path)
308327
f = FakeFile(
309328
bucket,
@@ -312,7 +331,7 @@ def _insert_random_file(
312331
generation,
313332
crc32c=crc32c,
314333
last_updated=last_updated)
315-
client.objects.add_file(f)
334+
client.objects.add_file(f, fail_when_getting_metadata, fail_when_reading)
316335
return f
317336

318337
def setUp(self):
@@ -841,6 +860,27 @@ def test_downloader_fail_to_get_project_number(self, mock_get):
841860

842861
self.assertEqual(metric_value, 2)
843862

863+
def test_downloader_fail_non_existent_object(self):
864+
file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
865+
with self.assertRaises(IOError):
866+
self.gcs.open(file_name, 'r')
867+
868+
def test_downloader_fail_when_getting_metadata(self):
869+
file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
870+
file_size = 5 * 1024 * 1024 + 100
871+
self._insert_random_file(
872+
self.client, file_name, file_size, fail_when_getting_metadata=True)
873+
with self.assertRaises(HttpError):
874+
self.gcs.open(file_name, 'r')
875+
876+
def test_downloader_fail_when_reading(self):
877+
file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
878+
file_size = 5 * 1024 * 1024 + 100
879+
self._insert_random_file(
880+
self.client, file_name, file_size, fail_when_reading=True)
881+
with self.assertRaises(HttpError):
882+
self.gcs.open(file_name, 'r')
883+
844884
def test_uploader_monitoring_info(self):
845885
# Clear the process wide metric container.
846886
MetricsEnvironment.process_wide_container().reset()

0 commit comments

Comments
 (0)