@@ -100,7 +100,7 @@ def test_single_container_local_mode_local_data(modules_sagemaker_session):
100100 delete_local_path (path )
101101
102102
103- def test_single_container_local_mode_s3_data_remove_input (modules_sagemaker_session ):
103+ def test_single_container_local_mode_s3_data (modules_sagemaker_session ):
104104 with lock .lock (LOCK_PATH ):
105105 try :
106106 # upload local data to s3
@@ -163,69 +163,7 @@ def test_single_container_local_mode_s3_data_remove_input(modules_sagemaker_sess
163163 delete_local_path (path )
164164
165165
166- def test_single_container_local_mode_s3_data_not_remove_input (modules_sagemaker_session ):
167- with lock .lock (LOCK_PATH ):
168- try :
169- # upload local data to s3
170- session = modules_sagemaker_session
171- bucket = session .default_bucket ()
172- session .upload_data (
173- path = os .path .join (SOURCE_DIR , "data/train/" ),
174- bucket = bucket ,
175- key_prefix = "data/train" ,
176- )
177- session .upload_data (
178- path = os .path .join (SOURCE_DIR , "data/test/" ),
179- bucket = bucket ,
180- key_prefix = "data/test" ,
181- )
182-
183- source_code = SourceCode (
184- source_dir = SOURCE_DIR ,
185- entry_script = "local_training_script.py" ,
186- )
187-
188- compute = Compute (
189- instance_type = "local_cpu" ,
190- instance_count = 1 ,
191- )
192-
193- # read input data from s3
194- train_data = InputData (channel_name = "train" , data_source = f"s3://{ bucket } /data/train/" )
195-
196- test_data = InputData (channel_name = "test" , data_source = f"s3://{ bucket } /data/test/" )
197-
198- model_trainer = ModelTrainer (
199- training_image = DEFAULT_CPU_IMAGE ,
200- sagemaker_session = modules_sagemaker_session ,
201- source_code = source_code ,
202- compute = compute ,
203- input_data_config = [train_data , test_data ],
204- base_job_name = "local_mode_single_container_s3_data" ,
205- training_mode = Mode .LOCAL_CONTAINER ,
206- remove_inputs_and_container_artifacts = False ,
207- )
208-
209- model_trainer .train ()
210- assert os .path .exists (os .path .join (CWD , "compressed_artifacts/model.tar.gz" ))
211- finally :
212- subprocess .run (["docker" , "compose" , "down" , "-v" ])
213- directories = [
214- "compressed_artifacts" ,
215- "artifacts" ,
216- "model" ,
217- "shared" ,
218- "input" ,
219- "output" ,
220- "algo-1" ,
221- ]
222-
223- for directory in directories :
224- path = os .path .join (CWD , directory )
225- delete_local_path (path )
226-
227-
228- def test_multi_container_local_mode_remove_input (modules_sagemaker_session ):
166+ def test_multi_container_local_mode (modules_sagemaker_session ):
229167 with lock .lock (LOCK_PATH ):
230168 try :
231169 source_code = SourceCode (
@@ -284,65 +222,3 @@ def test_multi_container_local_mode_remove_input(modules_sagemaker_session):
284222 for directory in directories :
285223 path = os .path .join (CWD , directory )
286224 delete_local_path (path )
287-
288-
289- def test_multi_container_local_mode_not_remove_input (modules_sagemaker_session ):
290- with lock .lock (LOCK_PATH ):
291- try :
292- source_code = SourceCode (
293- source_dir = SOURCE_DIR ,
294- entry_script = "local_training_script.py" ,
295- )
296-
297- distributed = Torchrun (
298- process_count_per_node = 1 ,
299- )
300-
301- compute = Compute (
302- instance_type = "local_cpu" ,
303- instance_count = 2 ,
304- )
305-
306- train_data = InputData (
307- channel_name = "train" ,
308- data_source = os .path .join (SOURCE_DIR , "data/train/" ),
309- )
310-
311- test_data = InputData (
312- channel_name = "test" ,
313- data_source = os .path .join (SOURCE_DIR , "data/test/" ),
314- )
315-
316- model_trainer = ModelTrainer (
317- training_image = DEFAULT_CPU_IMAGE ,
318- sagemaker_session = modules_sagemaker_session ,
319- source_code = source_code ,
320- distributed = distributed ,
321- compute = compute ,
322- input_data_config = [train_data , test_data ],
323- base_job_name = "local_mode_multi_container" ,
324- training_mode = Mode .LOCAL_CONTAINER ,
325- remove_inputs_and_container_artifacts = False ,
326- )
327-
328- model_trainer .train ()
329- assert os .path .exists (os .path .join (CWD , "compressed_artifacts/model.tar.gz" ))
330- assert os .path .exists (os .path .join (CWD , "algo-1" ))
331- assert os .path .exists (os .path .join (CWD , "algo-2" ))
332-
333- finally :
334- subprocess .run (["docker" , "compose" , "down" , "-v" ])
335- directories = [
336- "compressed_artifacts" ,
337- "artifacts" ,
338- "model" ,
339- "shared" ,
340- "input" ,
341- "output" ,
342- "algo-1" ,
343- "algo-2" ,
344- ]
345-
346- for directory in directories :
347- path = os .path .join (CWD , directory )
348- delete_local_path (path )
0 commit comments