|
13 | 13 | from linehaul.events.parser import parse, Download, Simple
|
14 | 14 |
|
15 | 15 | from google.api_core import exceptions
|
16 |
| -from google.cloud import bigquery, storage |
| 16 | +from google.cloud import bigquery, storage, pubsub_v1 |
17 | 17 |
|
18 | 18 | _cattr = cattr.Converter()
|
19 | 19 | _cattr.register_unstructure_hook(
|
|
22 | 22 |
|
23 | 23 | DEFAULT_PROJECT = os.environ.get("GCP_PROJECT", "the-psf")
|
24 | 24 | RESULT_BUCKET = os.environ.get("RESULT_BUCKET")
|
| 25 | +PUBSUB_TOPIC = os.environ.get("PUBSUB_TOPIC") |
25 | 26 |
|
26 | 27 | # Multiple datasets can be specified by separating them with whitespace
|
27 | 28 | # Datasets in other projects can be referenced by using the full dataset id:
|
@@ -120,9 +121,12 @@ def process_fastly_log(data, context):
|
120 | 121 |
|
121 | 122 |
|
122 | 123 | def load_processed_files_into_bigquery(event, context):
|
| 124 | + continue_publishing = False |
123 | 125 | if "attributes" in event and "partition" in event["attributes"]:
|
124 | 126 | # Check to see if we've manually triggered the function and provided a partition
|
125 | 127 | partition = event["attributes"]["partition"]
|
| 128 | + if "continue_publishing" in event["attributes"]: |
| 129 | + continue_publishing = bool(event["attributes"]["continue_publishing"]) |
126 | 130 | else:
|
127 | 131 | # Otherwise, this was triggered via cron, use the current time
|
128 | 132 | partition = datetime.datetime.utcnow().strftime("%Y%m%d")
|
@@ -192,3 +196,14 @@ def load_processed_files_into_bigquery(event, context):
|
192 | 196 | with storage_client.batch():
|
193 | 197 | for blob in simple_source_blobs:
|
194 | 198 | blob.delete()
|
| 199 | + |
| 200 | + if continue_publishing and ( |
| 201 | + len(download_source_blobs) > 0 or len(simple_source_blobs) > 0 |
| 202 | + ): |
| 203 | + publisher = pubsub_v1.PublisherClient() |
| 204 | + topic_path = publisher.topic_path(DEFAULT_PROJECT, PUBSUB_TOPIC) |
| 205 | + publisher.publish( |
| 206 | + topic_path, |
| 207 | + partition=partition, |
| 208 | + continue_publishing=str(continue_publishing), |
| 209 | + ) |
0 commit comments