Fix permissions (#431)

yunfeng-scale · web-flow · commit 5585d8e7da7b · 2024-02-06T23:04:33.000-08:00
* Fix s5cmd env vars

* more fixes for s5cmd

* dont error

* add back aws_profile

* flush

* fix test
diff --git a/model-engine/model_engine_server/inference/batch_inference/vllm_batch.py b/model-engine/model_engine_server/inference/batch_inference/vllm_batch.py
@@ -27,12 +27,17 @@ def get_s3_client():
 
 
 def download_model(checkpoint_path, final_weights_folder):
-    s5cmd = f"./s5cmd --numworkers 512 sync --concurrency 10 {os.path.join(checkpoint_path, '*')} {final_weights_folder}"
+    s5cmd = f"./s5cmd --numworkers 512 cp --concurrency 10 --include '*.model' --include '*.json' --include '*.bin' --include '*.safetensors' --exclude 'optimizer*' --exclude 'train*' {os.path.join(checkpoint_path, '*')} {final_weights_folder}"
+    env = os.environ.copy()
+    env["AWS_PROFILE"] = os.getenv("S3_WRITE_AWS_PROFILE", "default")
+    # Need to override these env vars so s5cmd uses AWS_PROFILE
+    env["AWS_ROLE_ARN"] = ""
+    env["AWS_WEB_IDENTITY_TOKEN_FILE"] = ""
     process = subprocess.Popen(
-        s5cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        s5cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env
     )
     for line in process.stdout:
-        print(line)
+        print(line, flush=True)
 
     process.wait()
 
@@ -41,7 +46,7 @@ def download_model(checkpoint_path, final_weights_folder):
         for line in iter(process.stderr.readline, ""):
             stderr_lines.append(line.strip())
 
-        raise IOError(f"Error downloading model weights: {stderr_lines}")
+        print(f"Error downloading model weights: {stderr_lines}", flush=True)
 
 
 def file_exists(path):
diff --git a/model-engine/tests/unit/inference/test_vllm_batch.py b/model-engine/tests/unit/inference/test_vllm_batch.py
@@ -74,7 +74,7 @@ async def test_batch_inference(
     new_callable=mock_open,
     read_data="Mocked content",
 )
-async def test_batch_inference_failed_to_download_model(
+async def test_batch_inference_failed_to_download_model_but_proceed(
     mock_open_func,
     mock_popen,
     mock_get_s3_client,
@@ -86,19 +86,36 @@ async def test_batch_inference_failed_to_download_model(
     create_vllm_request_outputs,
     mock_s3_client,
     mock_process,
+    mock_completion_output,
 ):
     # Mock the necessary objects and data
-    mock_process.returncode = 1
+    mock_process.returncode = 1  # Failed to download model
     mock_popen.return_value = mock_process
     mock_get_s3_client.return_value = mock_s3_client
     mock_create_batch_completions_request.parse_file.return_value = create_batch_completions_request
     mock_create_batch_completions_request_content.parse_raw.return_value = (
         create_batch_completions_request_content
     )
 
+    mock_results_generator = MagicMock()
+    mock_results_generator.__aiter__.return_value = create_vllm_request_outputs
+
+    # Mock the generate_with_vllm function
+    mock_generate_with_vllm.return_value = [mock_results_generator]
+
     # Call the function
-    with pytest.raises(IOError):
-        await batch_inference()
+    await batch_inference()
+
+    # Assertions
+    mock_create_batch_completions_request.parse_file.assert_called_once()
+    mock_open_func.assert_has_calls(
+        [
+            call("input_data_path", "r"),
+            call("output_data_path", "w"),
+            call().write(json.dumps([mock_completion_output.dict()])),
+        ],
+        any_order=True,
+    )
 
 
 @pytest.mark.asyncio