|  | 
| 1 |  | -from typing import List | 
|  | 1 | +from typing import Dict, List, Optional, Any | 
| 2 | 2 | 
 | 
| 3 |  | -from google.cloud.bigquery import AccessEntry, Dataset | 
|  | 3 | +from google.cloud.bigquery import AccessEntry, Client, Dataset | 
|  | 4 | +from google.api_core import exceptions as google_exceptions | 
| 4 | 5 | 
 | 
| 5 | 6 | from dbt.adapters.events.logging import AdapterLogger | 
| 6 | 7 | 
 | 
| @@ -45,3 +46,112 @@ def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> | 
| 45 | 46 |     access_entries.append(access_entry) | 
| 46 | 47 |     dataset.access_entries = access_entries | 
| 47 | 48 |     return dataset | 
|  | 49 | + | 
|  | 50 | + | 
|  | 51 | +def get_dataset_replication_config(client: Client, project: str, dataset: str) -> Dict[str, Any]: | 
|  | 52 | +    """Query current replication configuration from INFORMATION_SCHEMA.""" | 
|  | 53 | +    # Query the dataset-scoped INFORMATION_SCHEMA; no extra WHERE needed. | 
|  | 54 | +    query = ( | 
|  | 55 | +        f"SELECT replica_location, is_primary_replica " | 
|  | 56 | +        f"FROM `{project}.{dataset}.INFORMATION_SCHEMA.SCHEMATA_REPLICAS`" | 
|  | 57 | +    ) | 
|  | 58 | +    try: | 
|  | 59 | +        result_iter = client.query(query).result() | 
|  | 60 | +        replicas: List[str] = [] | 
|  | 61 | +        primary: Optional[str] = None | 
|  | 62 | +        for row in result_iter: | 
|  | 63 | +            replicas.append(row.replica_location) | 
|  | 64 | +            if row.is_primary_replica: | 
|  | 65 | +                primary = row.replica_location | 
|  | 66 | +        return {"replicas": replicas, "primary": primary} | 
|  | 67 | +    except ( | 
|  | 68 | +        google_exceptions.NotFound, | 
|  | 69 | +        google_exceptions.BadRequest, | 
|  | 70 | +        google_exceptions.GoogleAPIError, | 
|  | 71 | +    ) as exc: | 
|  | 72 | +        logger.warning(f"Unable to fetch replication info for `{project}.{dataset}`: {exc}") | 
|  | 73 | +        return {"replicas": [], "primary": None} | 
|  | 74 | + | 
|  | 75 | + | 
|  | 76 | +def needs_replication_update( | 
|  | 77 | +    current_config: Dict[str, Any], | 
|  | 78 | +    desired_replicas: List[str], | 
|  | 79 | +    desired_primary: Optional[str] = None, | 
|  | 80 | +) -> bool: | 
|  | 81 | +    """Determine if replication configuration needs to be updated. | 
|  | 82 | +
 | 
|  | 83 | +    Args: | 
|  | 84 | +        current_config (Dict[str, Any]): Current config from get_dataset_replication_config | 
|  | 85 | +        desired_replicas (List[str]): Desired replica locations | 
|  | 86 | +        desired_primary (Optional[str]): Desired primary replica location | 
|  | 87 | +
 | 
|  | 88 | +    Returns: | 
|  | 89 | +        bool: True if update is needed, False otherwise | 
|  | 90 | +    """ | 
|  | 91 | +    current_replicas = set(current_config.get("replicas", [])) | 
|  | 92 | +    desired_replicas_set = set(desired_replicas) | 
|  | 93 | + | 
|  | 94 | +    if current_replicas != desired_replicas_set: | 
|  | 95 | +        return True | 
|  | 96 | + | 
|  | 97 | +    return bool(desired_primary and current_config.get("primary") != desired_primary) | 
|  | 98 | + | 
|  | 99 | + | 
|  | 100 | +def apply_dataset_replication( | 
|  | 101 | +    client: Client, | 
|  | 102 | +    project: str, | 
|  | 103 | +    dataset: str, | 
|  | 104 | +    desired_replicas: List[str], | 
|  | 105 | +    desired_primary: Optional[str] = None, | 
|  | 106 | +) -> None: | 
|  | 107 | +    """Apply replication configuration using ALTER SCHEMA DDL.""" | 
|  | 108 | +    current = get_dataset_replication_config(client, project, dataset) | 
|  | 109 | + | 
|  | 110 | +    if not needs_replication_update(current, desired_replicas, desired_primary): | 
|  | 111 | +        logger.debug(f"Dataset {project}.{dataset} replication already configured correctly") | 
|  | 112 | +        return | 
|  | 113 | + | 
|  | 114 | +    logger.info(f"Configuring replication for dataset {project}.{dataset}") | 
|  | 115 | + | 
|  | 116 | +    current_replicas = set(current.get("replicas", [])) | 
|  | 117 | +    desired_replicas_set = set(desired_replicas) | 
|  | 118 | + | 
|  | 119 | +    # Add new replicas | 
|  | 120 | +    to_add = desired_replicas_set - current_replicas | 
|  | 121 | +    for location in to_add: | 
|  | 122 | +        sql = f"ALTER SCHEMA `{project}.{dataset}` ADD REPLICA `{location}`" | 
|  | 123 | +        logger.info(f"Adding replica: {location}") | 
|  | 124 | +        try: | 
|  | 125 | +            client.query(sql).result() | 
|  | 126 | +        except google_exceptions.GoogleAPIError as e: | 
|  | 127 | +            # Ignore "already exists", warn otherwise | 
|  | 128 | +            if "already exists" not in str(e).lower(): | 
|  | 129 | +                logger.warning(f"Failed to add replica {location}: {e}") | 
|  | 130 | + | 
|  | 131 | +    # Remove old replicas | 
|  | 132 | +    to_remove = current_replicas - desired_replicas_set | 
|  | 133 | +    for location in to_remove: | 
|  | 134 | +        sql = f"ALTER SCHEMA `{project}.{dataset}` DROP REPLICA `{location}`" | 
|  | 135 | +        logger.info(f"Dropping replica: {location}") | 
|  | 136 | +        try: | 
|  | 137 | +            client.query(sql).result() | 
|  | 138 | +        except google_exceptions.GoogleAPIError as e: | 
|  | 139 | +            logger.warning(f"Failed to drop replica {location}: {e}") | 
|  | 140 | + | 
|  | 141 | +    # Set primary replica if specified and different | 
|  | 142 | +    if desired_primary: | 
|  | 143 | +        if desired_primary not in desired_replicas_set: | 
|  | 144 | +            logger.warning( | 
|  | 145 | +                f"Desired primary replica '{desired_primary}' is not in desired replicas {sorted(desired_replicas_set)}. " | 
|  | 146 | +                "Skipping setting primary replica." | 
|  | 147 | +            ) | 
|  | 148 | +        elif current.get("primary") != desired_primary: | 
|  | 149 | +            sql = ( | 
|  | 150 | +                f"ALTER SCHEMA `{project}.{dataset}` " | 
|  | 151 | +                f"SET OPTIONS (default_replica = `{desired_primary}`)" | 
|  | 152 | +            ) | 
|  | 153 | +            logger.info(f"Setting primary replica: {desired_primary}") | 
|  | 154 | +            try: | 
|  | 155 | +                client.query(sql).result() | 
|  | 156 | +            except google_exceptions.GoogleAPIError as e: | 
|  | 157 | +                logger.warning(f"Failed to set primary replica '{desired_primary}': {e}") | 
0 commit comments