|
37 | 37 | import sys |
38 | 38 | import time |
39 | 39 | import uuid |
| 40 | +import threading |
40 | 41 | from json.decoder import JSONDecodeError |
41 | 42 | from typing import Optional |
42 | 43 | from typing import Sequence |
|
66 | 67 | from apache_beam.typehints.typehints import Any |
67 | 68 | from apache_beam.utils import retry |
68 | 69 | from apache_beam.utils.histogram import LinearBucket |
| 70 | +from cachetools import TTLCache, cachedmethod, Cache |
| 71 | +from cachetools.keys import hashkey |
69 | 72 |
|
70 | 73 | # Protect against environments where bigquery library is not available. |
71 | 74 | try: |
@@ -139,6 +142,12 @@ class ExportCompression(object): |
139 | 142 | SNAPPY = 'SNAPPY' |
140 | 143 | NONE = 'NONE' |
141 | 144 |
|
| 145 | +class _NonNoneTTLCache(TTLCache): |
| 146 | + """TTLCache that does not store None values.""" |
| 147 | + def __setitem__(self, key, value, cache_setitem=Cache.__setitem__): |
| 148 | + if value is not None: |
| 149 | + super().__setitem__(key=key, value=value) |
| 150 | + |
142 | 151 |
|
143 | 152 | def default_encoder(obj): |
144 | 153 | if isinstance(obj, decimal.Decimal): |
@@ -359,6 +368,9 @@ class BigQueryWrapper(object): |
359 | 368 |
|
360 | 369 | HISTOGRAM_METRIC_LOGGER = MetricLogger() |
361 | 370 |
|
| 371 | + _TABLE_CACHE = _NonNoneTTLCache(maxsize=1024, ttl=300) |
| 372 | + _TABLE_CACHE_LOCK = threading.RLock() |
| 373 | + |
362 | 374 | def __init__(self, client=None, temp_dataset_id=None, temp_table_ref=None): |
363 | 375 | self.client = client or BigQueryWrapper._bigquery_client(PipelineOptions()) |
364 | 376 | self.gcp_bq_client = client or gcp_bigquery.Client( |
@@ -788,27 +800,32 @@ def _insert_all_rows( |
788 | 800 | int(time.time() * 1000) - started_millis) |
789 | 801 | return not errors, errors |
790 | 802 |
|
| 803 | + @cachedmethod( |
| 804 | + cache=lambda self: self._TABLE_CACHE, |
| 805 | + lock=lambda self: self._TABLE_CACHE_LOCK, |
| 806 | + key=lambda self, project_id, dataset_id, table_id: hashkey( |
| 807 | + project_id, dataset_id, table_id), |
| 808 | + ) |
791 | 809 | @retry.with_exponential_backoff( |
792 | | - num_retries=MAX_RETRIES, |
793 | | - retry_filter=retry.retry_on_server_errors_timeout_or_quota_issues_filter) |
| 810 | + num_retries=MAX_RETRIES, |
| 811 | + retry_filter=retry.retry_on_server_errors_timeout_or_quota_issues_filter) |
794 | 812 | def get_table(self, project_id, dataset_id, table_id): |
795 | | - """Lookup a table's metadata object. |
796 | | -
|
797 | | - Args: |
798 | | - client: bigquery.BigqueryV2 instance |
799 | | - project_id: table lookup parameter |
800 | | - dataset_id: table lookup parameter |
801 | | - table_id: table lookup parameter |
802 | | -
|
803 | | - Returns: |
804 | | - bigquery.Table instance |
805 | | - Raises: |
806 | | - HttpError: if lookup failed. |
807 | | - """ |
| 813 | + """Lookup a table's metadata object. (TTL cached at class level). |
| 814 | +
|
| 815 | + Args: |
| 816 | + client: bigquery.BigqueryV2 instance |
| 817 | + project_id: table lookup parameter |
| 818 | + dataset_id: table lookup parameter |
| 819 | + table_id: table lookup parameter |
| 820 | +
|
| 821 | + Returns: |
| 822 | + bigquery.Table instance |
| 823 | + Raises: |
| 824 | + HttpError: if lookup failed. |
| 825 | + """ |
808 | 826 | request = bigquery.BigqueryTablesGetRequest( |
809 | | - projectId=project_id, datasetId=dataset_id, tableId=table_id) |
810 | | - response = self.client.tables.Get(request) |
811 | | - return response |
| 827 | + projectId=project_id, datasetId=dataset_id, tableId=table_id) |
| 828 | + return self.client.tables.Get(request) |
812 | 829 |
|
813 | 830 | def _create_table( |
814 | 831 | self, |
|
0 commit comments