2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
4
4
import glob
5
+ import hashlib
5
6
import os
6
7
import tempfile
7
8
8
9
import huggingface_hub .constants
9
10
10
11
from vllm .model_executor .model_loader .weight_utils import (
11
12
download_weights_from_hf )
12
- from vllm .transformers_utils .runai_utils import (is_runai_obj_uri ,
13
+ from vllm .transformers_utils .runai_utils import (ObjectStorageModel ,
14
+ is_runai_obj_uri ,
13
15
list_safetensors )
14
16
15
17
@@ -34,6 +36,23 @@ def test_runai_list_safetensors_local():
34
36
assert len (safetensors ) == len (files )
35
37
36
38
37
- if __name__ == "__main__" :
38
- test_is_runai_obj_uri ()
39
- test_runai_list_safetensors_local ()
39
+ def test_runai_pull_files_gcs (monkeypatch ):
40
+ monkeypatch .setenv ("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS" , "true" )
41
+ # Bypass default project lookup by setting GOOGLE_CLOUD_PROJECT
42
+ monkeypatch .setenv ("GOOGLE_CLOUD_PROJECT" , "fake-project" )
43
+ filename = "LT08_L1GT_074061_20130309_20170505_01_T2_MTL.txt"
44
+ gcs_bucket = "gs://gcp-public-data-landsat/LT08/01/074/061/LT08_L1GT_074061_20130309_20170505_01_T2/"
45
+ gcs_url = f"{ gcs_bucket } /{ filename } "
46
+ model = ObjectStorageModel (gcs_url )
47
+ model .pull_files (gcs_bucket , allow_pattern = [f"*{ filename } " ])
48
+ # To re-generate / change URLs:
49
+ # gsutil ls -L gs://<gcs-url> | grep "Hash (md5)" | tr -d ' ' \
50
+ # | cut -d":" -f2 | base64 -d | xxd -p
51
+ expected_checksum = "f60dea775da1392434275b311b31a431"
52
+ hasher = hashlib .new ("md5" )
53
+ with open (os .path .join (model .dir , filename ), 'rb' ) as f :
54
+ # Read the file in chunks to handle large files efficiently
55
+ for chunk in iter (lambda : f .read (4096 ), b'' ):
56
+ hasher .update (chunk )
57
+ actual_checksum = hasher .hexdigest ()
58
+ assert actual_checksum == expected_checksum
0 commit comments