|
| 1 | +""" |
| 2 | +Database COPY utilities for streaming large result sets directly to CSV. |
| 3 | +
|
| 4 | +This module provides functions to execute PostgreSQL COPY commands for efficient |
| 5 | +CSV generation. COPY runs entirely in the database engine, avoiding the overhead |
| 6 | +of Python object serialization. |
| 7 | +""" |
| 8 | + |
| 9 | +from io import BytesIO |
| 10 | +from typing import Any |
| 11 | +from uuid import UUID |
| 12 | + |
| 13 | +from flask import current_app |
| 14 | +from sqlalchemy import case, desc, func, text |
| 15 | +from sqlalchemy.orm import aliased |
| 16 | + |
| 17 | +from app import db |
| 18 | +from app.constants import NOTIFICATION_REPORT_REQUEST_MAPPING |
| 19 | +from app.models import ApiKey, Job, Notification, TemplateHistory, User |
| 20 | + |
| 21 | + |
| 22 | +def build_notifications_copy_query( |
| 23 | + service_id: UUID, |
| 24 | + notification_type: str, |
| 25 | + notification_statuses: list[str], |
| 26 | + limit_days: int, |
| 27 | + chunk_size: int, |
| 28 | + older_than_id: UUID | None = None, |
| 29 | +) -> str: |
| 30 | + # Aliases for table names |
| 31 | + n = aliased(Notification) |
| 32 | + t = aliased(TemplateHistory) |
| 33 | + j = aliased(Job) |
| 34 | + u = aliased(User) |
| 35 | + a = aliased(ApiKey) |
| 36 | + |
| 37 | + # Build recipient column (special handling for letters) |
| 38 | + recipient_col = case( |
| 39 | + (n.notification_type == "letter", func.regexp_replace(func.split_part(n.to, "\n", 1), "^ +| +$| ,$", "", "g")), |
| 40 | + else_=n.to, |
| 41 | + ).label("Recipient") |
| 42 | + |
| 43 | + # Build status column |
| 44 | + permanent_failure_msg = case( |
| 45 | + (n.notification_type == "email", "Email address doesn't exist"), |
| 46 | + (n.notification_type == "sms", "Phone number doesn't exist"), |
| 47 | + else_="Permanent failure", |
| 48 | + ) |
| 49 | + |
| 50 | + status_col = case( |
| 51 | + (n.status == "created", "Sending"), |
| 52 | + (n.status == "sending", "Sending"), |
| 53 | + (n.status == "pending", "Sending"), |
| 54 | + (n.status == "sent", "Sent"), |
| 55 | + (n.status == "delivered", "Delivered"), |
| 56 | + (n.status == "pending-virus-check", "Pending virus check"), |
| 57 | + (n.status == "virus-scan-failed", "Virus detected"), |
| 58 | + (n.status == "returned-letter", "Returned letter"), |
| 59 | + (n.status == "failed", "Failed"), |
| 60 | + (n.status == "technical-failure", "Tech issue"), |
| 61 | + (n.status == "temporary-failure", "Inbox not accepting messages right now"), |
| 62 | + (n.status == "permanent-failure", permanent_failure_msg), |
| 63 | + (n.status == "cancelled", "Cancelled"), |
| 64 | + (n.status == "validation-failed", "Validation failed"), |
| 65 | + else_=n.status, |
| 66 | + ).label("Status") |
| 67 | + |
| 68 | + # Build time column |
| 69 | + time_col = func.to_char( |
| 70 | + func.timezone("Europe/London", func.timezone("UTC", n.created_at)), "YYYY-MM-DD HH24:MI:SS" |
| 71 | + ).label("Time") |
| 72 | + |
| 73 | + # Build query |
| 74 | + query = ( |
| 75 | + db.session.query( |
| 76 | + recipient_col, |
| 77 | + func.coalesce(n.client_reference, "").label("Reference"), |
| 78 | + t.name.label("Template"), |
| 79 | + n.notification_type.label("Type"), |
| 80 | + func.coalesce(u.name, "").label("Sent by"), |
| 81 | + func.coalesce(u.email_address, "").label("Sent by email"), |
| 82 | + func.coalesce(j.original_file_name, "").label("Job"), |
| 83 | + status_col, |
| 84 | + time_col, |
| 85 | + func.coalesce(a.name, "").label("API key name"), |
| 86 | + n.id, # For pagination (not exported to CSV) |
| 87 | + n.created_at, # For pagination (not exported to CSV) |
| 88 | + ) |
| 89 | + .select_from(n) |
| 90 | + .join(t, t.id == n.template_id) |
| 91 | + .outerjoin(j, j.id == n.job_id) |
| 92 | + .outerjoin(u, u.id == n.created_by_id) |
| 93 | + .outerjoin(a, a.id == n.api_key_id) |
| 94 | + ) |
| 95 | + |
| 96 | + # Add filters |
| 97 | + query = query.filter( |
| 98 | + n.service_id == service_id, |
| 99 | + n.notification_type == notification_type, |
| 100 | + n.created_at >= func.now() - text(f"interval '{limit_days} days'"), |
| 101 | + n.key_type != "test", |
| 102 | + ) |
| 103 | + |
| 104 | + if notification_statuses: |
| 105 | + query = query.filter(n.status.in_(notification_statuses)) |
| 106 | + |
| 107 | + if older_than_id: |
| 108 | + # For pagination: fetch older notifications |
| 109 | + older_than_created_at = db.session.query(Notification.created_at).filter( |
| 110 | + Notification.id == older_than_id, Notification.service_id == service_id |
| 111 | + ).scalar_subquery() |
| 112 | + query = query.filter(n.created_at < older_than_created_at) |
| 113 | + |
| 114 | + # Add ordering and limit |
| 115 | + query = query.order_by(desc(n.created_at), desc(n.id)).limit(chunk_size) |
| 116 | + |
| 117 | + # Compile to SQL string |
| 118 | + compiled = query.statement.compile(dialect=db.engine.dialect, compile_kwargs={"literal_binds": True}) |
| 119 | + return str(compiled) |
| 120 | + |
| 121 | +def execute_copy_to_bytes( |
| 122 | + query: str, |
| 123 | + include_header: bool = True, |
| 124 | +) -> tuple[bytes, UUID | None, int]: |
| 125 | + # Get a raw connection from the pool |
| 126 | + conn = db.engine.raw_connection() |
| 127 | + try: |
| 128 | + cursor = conn.cursor() |
| 129 | + |
| 130 | + # First, execute the query to get the last notification ID |
| 131 | + # We need this for pagination |
| 132 | + cursor.execute(query.strip()) |
| 133 | + rows = cursor.fetchall() |
| 134 | + row_count = len(rows) |
| 135 | + |
| 136 | + last_id = None |
| 137 | + if row_count > 0: |
| 138 | + # The last two columns are id and created_at (used only for pagination) |
| 139 | + last_id = rows[-1][-2] # second-to-last column is the ID |
| 140 | + |
| 141 | + # Now build the COPY command to export the same data as CSV |
| 142 | + # We exclude the id and created_at columns from CSV output |
| 143 | + copy_query = f""" |
| 144 | + COPY ( |
| 145 | + SELECT |
| 146 | + "Recipient", |
| 147 | + "Reference", |
| 148 | + "Template", |
| 149 | + "Type", |
| 150 | + "Sent by", |
| 151 | + "Sent by email", |
| 152 | + "Job", |
| 153 | + "Status", |
| 154 | + "Time", |
| 155 | + "API key name" |
| 156 | + FROM ({query.strip()}) AS subquery |
| 157 | + ) TO STDOUT WITH CSV {"HEADER" if include_header else ""} |
| 158 | + """ |
| 159 | + |
| 160 | + # Create a BytesIO buffer to capture the COPY output |
| 161 | + buffer = BytesIO() |
| 162 | + |
| 163 | + # Execute COPY and stream to buffer |
| 164 | + cursor.copy_expert(copy_query, buffer) |
| 165 | + |
| 166 | + # Get the bytes from the buffer |
| 167 | + csv_bytes = buffer.getvalue() |
| 168 | + |
| 169 | + current_app.logger.info( |
| 170 | + f"COPY command executed successfully. " |
| 171 | + f"Rows: {row_count}, " |
| 172 | + f"Size: {len(csv_bytes)} bytes, " |
| 173 | + f"Last ID: {last_id}" |
| 174 | + ) |
| 175 | + |
| 176 | + return csv_bytes, last_id, row_count |
| 177 | + |
| 178 | + finally: |
| 179 | + # Always close the raw connection |
| 180 | + conn.close() |
| 181 | + |
| 182 | + |
| 183 | +def get_notifications_csv_chunk( |
| 184 | + service_id: UUID, |
| 185 | + notification_type: str, |
| 186 | + notification_status_filter: str, |
| 187 | + limit_days: int, |
| 188 | + chunk_size: int, |
| 189 | + older_than_id: UUID | None = None, |
| 190 | + include_header: bool = True, |
| 191 | +) -> tuple[bytes, UUID | None, int]: |
| 192 | + # Convert status filter to actual statuses |
| 193 | + notification_statuses = NOTIFICATION_REPORT_REQUEST_MAPPING.get(notification_status_filter, []) |
| 194 | + |
| 195 | + query = build_notifications_copy_query( |
| 196 | + service_id=service_id, |
| 197 | + notification_type=notification_type, |
| 198 | + notification_statuses=notification_statuses, |
| 199 | + limit_days=limit_days, |
| 200 | + chunk_size=chunk_size, |
| 201 | + older_than_id=older_than_id, |
| 202 | + ) |
| 203 | + |
| 204 | + return execute_copy_to_bytes(query, include_header=include_header) |
0 commit comments