1616
1717import os
1818
19+ from ..core .remote_state .remote_state_client import RemoteStateClient
20+ from ..core .remote_state .fuse_remote_state import FuseStateClient
1921from ..core .blueprint .blueprint_generator import (
2022 BlueprintGenerator ,
2123 BlueprintGeneratorOutput ,
2224 a3mega_device_type ,
2325 a3ultra_device_type ,
2426 supported_device_types ,
2527)
28+ from ..core .commands import run_command_for_value
2629from ..core .capacity import get_capacity_type
2730from ..core .docker_manager import DockerManager
2831from ..core .gcloud_context import zone_to_region
@@ -49,13 +52,22 @@ def cluster_create(args) -> None:
4952 """
5053 check_gcloud_authenticated ()
5154 prepare_directories ()
52- gcm = prepare_gcluster_manager ()
5355 region = zone_to_region (args .zone )
5456
5557 # unique_name uses shortened hash string, so still name collision is possible
5658 unique_name = get_unique_name (args .project , region , args .cluster )
5759 # prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
5860 prefix = get_prefix_path (args .project , region )
61+ remote_state_client = None
62+ if args .cluster_state_gcs_bucket is not None :
63+ remote_state_client = FuseStateClient (
64+ bucket = args .cluster_state_gcs_bucket ,
65+ state_directory = os .path .join (blueprints_path , prefix , unique_name ),
66+ prefix = prefix ,
67+ cluster = args .cluster ,
68+ deployment_name = unique_name ,
69+ )
70+ gcm = prepare_gcluster_manager (remote_state_client )
5971
6072 bp = generate_blueprint (blueprint_name = unique_name , args = args , prefix = prefix )
6173
@@ -70,6 +82,8 @@ def cluster_create(args) -> None:
7082 deployment_name = unique_name ,
7183 prefix = prefix ,
7284 )
85+ if args .cluster_state_gcs_bucket is not None :
86+ gcm .upload_state ()
7387
7488 set_cluster_command_code = set_cluster_command (args )
7589 if set_cluster_command_code != 0 :
@@ -89,15 +103,42 @@ def cluster_delete(args) -> None:
89103 """
90104 check_gcloud_authenticated ()
91105 prepare_directories ()
92- gcm = prepare_gcluster_manager ()
93106 region = zone_to_region (args .zone )
107+ unique_name = get_unique_name (args .project , region , args .cluster )
108+ # prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
109+ prefix = get_prefix_path (args .project , region )
110+ remote_state_client = None
111+ if args .cluster_state_gcs_bucket is not None :
112+ remote_state_client = FuseStateClient (
113+ bucket = args .cluster_state_gcs_bucket ,
114+ state_directory = os .path .join (blueprints_path , prefix , unique_name ),
115+ prefix = prefix ,
116+ cluster = args .cluster ,
117+ deployment_name = unique_name ,
118+ )
119+ gcm = prepare_gcluster_manager (remote_state_client )
94120
95121 # unique_name uses shortened hash string, so still name collision is possible
96122 unique_name = get_unique_name (args .project , region , args .cluster )
97123 # prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
98- prefix_path = get_prefix_path (args .project , region )
124+ prefix = get_prefix_path (args .project , region )
125+ if args .cluster_state_gcs_bucket is not None :
126+ gcm .download_state ()
127+
128+ bp = BlueprintGeneratorOutput (
129+ blueprint_file = os .path .join (blueprints_path , prefix , unique_name )
130+ + '.yaml' ,
131+ blueprint_dependencies = os .path .join (
132+ blueprints_path , prefix , unique_name
133+ ),
134+ )
99135
100- gcm .destroy_deployment (deployment_name = unique_name , prefix = prefix_path )
136+ gcm .stage_files (
137+ blueprint_file = bp .blueprint_file ,
138+ blueprint_dependencies = bp .blueprint_dependencies ,
139+ prefix = prefix ,
140+ )
141+ gcm .destroy_deployment (deployment_name = unique_name , prefix = prefix )
101142
102143 xpk_exit (0 )
103144
@@ -140,18 +181,35 @@ def check_gcloud_authenticated():
140181 xpk_exit (1 )
141182
142183
143- def prepare_gcluster_manager () -> GclusterManager :
184+ def prepare_gcluster_manager (
185+ remote_state_client : RemoteStateClient | None ,
186+ ) -> GclusterManager :
144187 dm = DockerManager (
145188 working_dir = gcluster_working_dir , gcloud_cfg_path = gcloud_cfg_path
146189 )
147190 dm .initialize ()
148- return GclusterManager (gcluster_command_runner = dm )
191+ return GclusterManager (
192+ gcluster_command_runner = dm , remote_state_client = remote_state_client
193+ )
149194
150195
151196def prepare_blueprint_generator () -> BlueprintGenerator :
152197 return BlueprintGenerator (storage_path = blueprints_path )
153198
154199
200+ def validate_state_gcs_bucket (args ):
201+ bucket_validate_cmd = (
202+ f'gcloud storage buckets describe gs://{ args .cluster_state_gcs_bucket } '
203+ )
204+ err_code , _ = run_command_for_value (
205+ bucket_validate_cmd ,
206+ 'Validate remote state bucket existence.' ,
207+ global_args = args ,
208+ )
209+ if err_code != 0 :
210+ xpk_exit (err_code )
211+
212+
155213def generate_blueprint (
156214 blueprint_name , args , prefix = None
157215) -> BlueprintGeneratorOutput :
@@ -162,6 +220,9 @@ def generate_blueprint(
162220
163221 bpg = prepare_blueprint_generator ()
164222
223+ if args .cluster_state_gcs_bucket is not None :
224+ validate_state_gcs_bucket (args )
225+
165226 if args .device_type in supported_device_types :
166227 if args .device_type == a3mega_device_type :
167228 num_nodes = args .num_nodes if not args .num_nodes is None else 2
@@ -178,6 +239,7 @@ def generate_blueprint(
178239 capacity_type = capacity_type ,
179240 system_node_pool_machine_type = args .default_pool_cpu_machine_type ,
180241 system_node_pool_min_node_count = args .default_pool_cpu_num_nodes ,
242+ gcs_bucket = args .cluster_state_gcs_bucket ,
181243 )
182244 if args .device_type == a3ultra_device_type :
183245 num_nodes = args .num_nodes if not args .num_nodes is None else 2
@@ -195,5 +257,6 @@ def generate_blueprint(
195257 capacity_type = capacity_type ,
196258 system_node_pool_machine_type = args .default_pool_cpu_machine_type ,
197259 system_node_pool_min_node_count = args .default_pool_cpu_num_nodes ,
260+ gcs_bucket = args .cluster_state_gcs_bucket ,
198261 )
199262 return None
0 commit comments