|
| 1 | +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 2 | +# See https://llvm.org/LICENSE.txt for license information. |
| 3 | +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 4 | +"""Caches .lit_test_times.txt files between premerge invocations. |
| 5 | +
|
| 6 | +.lit_test_times.txt files are used by lit to order tests to best take advantage |
| 7 | +of parallelism. Having them around and up to date can result in a ~15% |
| 8 | +improvement in test times. This script downloading cached test time files and |
| 9 | +uploading new versions to the GCS buckets used for caching. |
| 10 | +""" |
| 11 | + |
| 12 | +import sys |
| 13 | +import os |
| 14 | +import logging |
| 15 | +import multiprocessing.pool |
| 16 | +import pathlib |
| 17 | +import glob |
| 18 | + |
| 19 | +from google.cloud import storage |
| 20 | + |
| 21 | +GCS_PARALLELISM = 100 |
| 22 | + |
| 23 | + |
| 24 | +def _maybe_upload_timing_file(bucket, timing_file_path): |
| 25 | + if os.path.exists(timing_file_path): |
| 26 | + timing_file_blob = bucket.blob("lit_timing/" + timing_file_path) |
| 27 | + timing_file_blob.upload_from_filename(timing_file_path) |
| 28 | + |
| 29 | + |
| 30 | +def upload_timing_files(storage_client, bucket_name: str): |
| 31 | + bucket = storage_client.bucket(bucket_name) |
| 32 | + with multiprocessing.pool.ThreadPool(GCS_PARALLELISM) as thread_pool: |
| 33 | + futures = [] |
| 34 | + for timing_file_path in glob.glob("**/.lit_test_times.txt", recursive=True): |
| 35 | + futures.append( |
| 36 | + thread_pool.apply_async( |
| 37 | + _maybe_upload_timing_file, (bucket, timing_file_path) |
| 38 | + ) |
| 39 | + ) |
| 40 | + for future in futures: |
| 41 | + future.get() |
| 42 | + print("Done uploading") |
| 43 | + |
| 44 | + |
| 45 | +def _maybe_download_timing_file(blob): |
| 46 | + file_name = blob.name.removeprefix("lit_timing/") |
| 47 | + pathlib.Path(os.path.dirname(file_name)).mkdir(parents=True, exist_ok=True) |
| 48 | + blob.download_to_filename(file_name) |
| 49 | + |
| 50 | + |
| 51 | +def download_timing_files(storage_client, bucket_name: str): |
| 52 | + bucket = storage_client.bucket(bucket_name) |
| 53 | + blobs = bucket.list_blobs(prefix="lit_timing") |
| 54 | + with multiprocessing.pool.ThreadPool(GCS_PARALLELISM) as thread_pool: |
| 55 | + futures = [] |
| 56 | + for timing_file_blob in blobs: |
| 57 | + futures.append( |
| 58 | + thread_pool.apply_async( |
| 59 | + _maybe_download_timing_file, (timing_file_blob,) |
| 60 | + ) |
| 61 | + ) |
| 62 | + for future in futures: |
| 63 | + future.get() |
| 64 | + print("Done downloading") |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + if len(sys.argv) != 2: |
| 69 | + logging.fatal("Expected usage is cache_lit_timing_files.py <upload/download>") |
| 70 | + sys.exit(1) |
| 71 | + action = sys.argv[1] |
| 72 | + storage_client = storage.Client() |
| 73 | + bucket_name = os.environ["CACHE_GCS_BUCKET"] |
| 74 | + if action == "download": |
| 75 | + download_timing_files(storage_client, bucket_name) |
| 76 | + elif action == "upload": |
| 77 | + upload_timing_files(storage_client, bucket_name) |
| 78 | + else: |
| 79 | + logging.fatal("Expected usage is cache_lit_timing_files.py <upload/download>") |
| 80 | + sys.exit(1) |
0 commit comments