|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +""" |
| 4 | +Probe lfs files. |
| 5 | +For each source file provided as output, this will print: |
| 6 | +* "local", if the source file is not an LFS pointer |
| 7 | +* the sha256 hash, a space character and a transient download link obtained via the LFS protocol otherwise |
| 8 | +""" |
| 9 | + |
| 10 | +import sys |
| 11 | +import pathlib |
| 12 | +import subprocess |
| 13 | +import os |
| 14 | +import shutil |
| 15 | +import json |
| 16 | +import urllib.request |
| 17 | +from urllib.parse import urlparse |
| 18 | +import re |
| 19 | + |
| 20 | +sources = [pathlib.Path(arg).resolve() for arg in sys.argv[1:]] |
| 21 | +source_dir = pathlib.Path(os.path.commonpath(src.parent for src in sources)) |
| 22 | +source_dir = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], cwd=source_dir, text=True).strip() |
| 23 | + |
| 24 | + |
| 25 | +def get_endpoint(): |
| 26 | + lfs_env = subprocess.check_output(["git", "lfs", "env"], text=True, cwd=source_dir) |
| 27 | + endpoint = ssh_server = ssh_path = None |
| 28 | + endpoint_re = re.compile(r'Endpoint(?: \(\S+\))?=(\S+)') |
| 29 | + ssh_re = re.compile(r'\s*SSH=(\S*):(.*)') |
| 30 | + for line in lfs_env.splitlines(): |
| 31 | + m = endpoint_re.match(line) |
| 32 | + if m: |
| 33 | + if endpoint is None: |
| 34 | + endpoint = m[1] |
| 35 | + else: |
| 36 | + break |
| 37 | + m = ssh_re.match(line) |
| 38 | + if m: |
| 39 | + ssh_server, ssh_path = m.groups() |
| 40 | + break |
| 41 | + assert endpoint, f"no Endpoint= line found in git lfs env:\n{lfs_env}" |
| 42 | + headers = { |
| 43 | + "Content-Type": "application/vnd.git-lfs+json", |
| 44 | + "Accept": "application/vnd.git-lfs+json", |
| 45 | + } |
| 46 | + if ssh_server: |
| 47 | + ssh_command = shutil.which(os.environ.get("GIT_SSH", os.environ.get("GIT_SSH_COMMAND", "ssh"))) |
| 48 | + assert ssh_command, "no ssh command found" |
| 49 | + with subprocess.Popen([ssh_command, ssh_server, "git-lfs-authenticate", ssh_path, "download"], |
| 50 | + stdout=subprocess.PIPE) as ssh: |
| 51 | + resp = json.load(ssh.stdout) |
| 52 | + assert ssh.wait() == 0, "ssh command failed" |
| 53 | + endpoint = resp.get("href", endpoint) |
| 54 | + for k, v in resp.get("header", {}).items(): |
| 55 | + headers[k.capitalize()] = v |
| 56 | + url = urlparse(endpoint) |
| 57 | + # this is how actions/checkout persist credentials |
| 58 | + # see https://github.com/actions/checkout/blob/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b/src/git-auth-helper.ts#L56-L63 |
| 59 | + auth = subprocess.run(["git", "config", f"http.{url.scheme}://{url.netloc}/.extraheader"], text=True, |
| 60 | + stdout=subprocess.PIPE, cwd=source_dir).stdout.strip() |
| 61 | + for l in auth.splitlines(): |
| 62 | + k, _, v = l.partition(": ") |
| 63 | + headers[k.capitalize()] = v |
| 64 | + if "GITHUB_TOKEN" in os.environ: |
| 65 | + headers["Authorization"] = f"token {os.environ['GITHUB_TOKEN']}" |
| 66 | + return endpoint, headers |
| 67 | + |
| 68 | + |
| 69 | +# see https://github.com/git-lfs/git-lfs/blob/310d1b4a7d01e8d9d884447df4635c7a9c7642c2/docs/api/basic-transfers.md |
| 70 | +def get_locations(objects): |
| 71 | + href, headers = get_endpoint() |
| 72 | + indexes = [i for i, o in enumerate(objects) if o] |
| 73 | + ret = ["local" for _ in objects] |
| 74 | + req = urllib.request.Request( |
| 75 | + f"{href}/objects/batch", |
| 76 | + headers=headers, |
| 77 | + data=json.dumps({ |
| 78 | + "operation": "download", |
| 79 | + "transfers": ["basic"], |
| 80 | + "objects": [o for o in objects if o], |
| 81 | + "hash_algo": "sha256", |
| 82 | + }).encode("ascii"), |
| 83 | + ) |
| 84 | + with urllib.request.urlopen(req) as resp: |
| 85 | + data = json.load(resp) |
| 86 | + assert len(data["objects"]) == len(indexes), data |
| 87 | + for i, resp in zip(indexes, data["objects"]): |
| 88 | + ret[i] = f'{resp["oid"]} {resp["actions"]["download"]["href"]}' |
| 89 | + return ret |
| 90 | + |
| 91 | + |
| 92 | +def get_lfs_object(path): |
| 93 | + with open(path, 'rb') as fileobj: |
| 94 | + lfs_header = "version https://git-lfs.github.com/spec".encode() |
| 95 | + actual_header = fileobj.read(len(lfs_header)) |
| 96 | + sha256 = size = None |
| 97 | + if lfs_header != actual_header: |
| 98 | + return None |
| 99 | + for line in fileobj: |
| 100 | + line = line.decode('ascii').strip() |
| 101 | + if line.startswith("oid sha256:"): |
| 102 | + sha256 = line[len("oid sha256:"):] |
| 103 | + elif line.startswith("size "): |
| 104 | + size = int(line[len("size "):]) |
| 105 | + if not (sha256 and line): |
| 106 | + raise Exception("malformed pointer file") |
| 107 | + return {"oid": sha256, "size": size} |
| 108 | + |
| 109 | + |
| 110 | +objects = [get_lfs_object(src) for src in sources] |
| 111 | +for resp in get_locations(objects): |
| 112 | + print(resp) |
0 commit comments