@@ -64,7 +64,7 @@ def _cache_finetune_llm_images(
6464 is_high_priority = 1 , # make it a high priority
6565 has_no_available_workers = 1 ,
6666 # assuming it has no available workers so that it will be at top after reverse sorting
67- last_updated_at = datetime .max ,
67+ last_updated_at = datetime .max . replace ( tzinfo = pytz . utc ) ,
6868 # setting it to max to ensure it will be at top after reverse sorting
6969 )
7070
@@ -135,7 +135,9 @@ async def execute(self, endpoint_infra_states: Dict[str, Tuple[bool, ModelEndpoi
135135 (
136136 state .image not in images_to_cache_priority ["cpu" ]
137137 or last_updated_at .replace (tzinfo = pytz .utc )
138- > images_to_cache_priority ["cpu" ][state .image ].last_updated_at
138+ > images_to_cache_priority ["cpu" ][state .image ].last_updated_at .replace (
139+ tzinfo = pytz .utc
140+ )
139141 )
140142 and self .docker_repository .image_exists (image_tag , repository_name )
141143 ):
@@ -150,7 +152,9 @@ async def execute(self, endpoint_infra_states: Dict[str, Tuple[bool, ModelEndpoi
150152 (
151153 state .image not in images_to_cache_priority [key ]
152154 or last_updated_at .replace (tzinfo = pytz .utc )
153- > images_to_cache_priority [key ][state .image ].last_updated_at
155+ > images_to_cache_priority [key ][
156+ state .image
157+ ].last_updated_at .replace (tzinfo = pytz .utc )
154158 )
155159 and self .docker_repository .image_exists (image_tag , repository_name )
156160 ):
@@ -162,9 +166,13 @@ async def execute(self, endpoint_infra_states: Dict[str, Tuple[bool, ModelEndpoi
162166 continue
163167
164168 images_to_cache = CachedImages (cpu = [], a10 = [], a100 = [], t4 = [])
165- for key , val in images_to_cache_priority .items ():
166- images_to_cache [key ] = sorted ( # type: ignore
167- val .keys (), key = lambda image : val [image ], reverse = True
168- )[:IMAGES_TO_CACHE_PER_INSTANCE_TYPE ]
169+ try :
170+ for key , val in images_to_cache_priority .items ():
171+ images_to_cache [key ] = sorted ( # type: ignore
172+ val .keys (), key = lambda image : val [image ], reverse = True
173+ )[:IMAGES_TO_CACHE_PER_INSTANCE_TYPE ]
174+ logger .info ("sorted images to cache successfully" )
175+ except Exception as exc :
176+ logger .warning (f"sorting had an error. Error message: { exc } . Skipping sorting..." )
169177
170178 await self .image_cache_gateway .create_or_update_image_cache (images_to_cache )
0 commit comments