7
7
* the sha256 hash, a space character and a transient download link obtained via the LFS protocol otherwise
8
8
If --hash-only is provided, the transient URL will not be fetched and printed
9
9
"""
10
-
10
+ import dataclasses
11
11
import sys
12
12
import pathlib
13
13
import subprocess
14
14
import os
15
15
import shutil
16
16
import json
17
+ import typing
17
18
import urllib .request
18
19
from urllib .parse import urlparse
19
20
import re
20
21
import base64
21
22
from dataclasses import dataclass
22
- from typing import Dict
23
23
import argparse
24
24
25
25
@@ -32,76 +32,124 @@ def options():
32
32
33
33
@dataclass
34
34
class Endpoint :
35
+ name : str
35
36
href : str
36
- headers : Dict [str , str ]
37
+ ssh : typing .Optional [str ] = None
38
+ headers : typing .Dict [str , str ] = dataclasses .field (default_factory = dict )
37
39
38
- def update_headers (self , d : Dict [ str , str ]):
39
- self .headers .update ((k .capitalize (), v ) for k , v in d . items () )
40
+ def update_headers (self , d : typing . Iterable [ typing . Tuple [ str , str ] ]):
41
+ self .headers .update ((k .capitalize (), v ) for k , v in d )
40
42
41
43
42
44
opts = options ()
43
45
sources = [p .resolve () for p in opts .sources ]
44
46
source_dir = pathlib .Path (os .path .commonpath (src .parent for src in sources ))
45
- source_dir = subprocess .check_output (["git" , "rev-parse" , "--show-toplevel" ], cwd = source_dir , text = True ).strip ()
47
+ source_dir = subprocess .check_output (
48
+ ["git" , "rev-parse" , "--show-toplevel" ], cwd = source_dir , text = True
49
+ ).strip ()
46
50
47
51
48
- def get_env (s , sep = "=" ):
49
- ret = {}
50
- for m in re .finditer (fr'(.*?){ sep } (.*)' , s , re .M ):
51
- ret .setdefault (* m .groups ())
52
- return ret
52
+ def get_env (s : str , sep : str = "=" ) -> typing .Iterable [typing .Tuple [str , str ]]:
53
+ for m in re .finditer (rf"(.*?){ sep } (.*)" , s , re .M ):
54
+ yield m .groups ()
53
55
54
56
55
57
def git (* args , ** kwargs ):
56
- return subprocess .run (("git" ,) + args , stdout = subprocess .PIPE , text = True , cwd = source_dir , ** kwargs ).stdout .strip ()
57
-
58
-
59
- def get_endpoint ():
60
- lfs_env_items = iter (get_env (subprocess .check_output (["git" , "lfs" , "env" ], text = True , cwd = source_dir )).items ())
61
- endpoint = next (v for k , v in lfs_env_items if k .startswith ('Endpoint' ))
62
- endpoint , _ , _ = endpoint .partition (' ' )
63
- # only take the ssh endpoint if it follows directly after the first endpoint we found
64
- # in a situation like
65
- # Endpoint (a)=...
66
- # Endpoint (b)=...
67
- # SSH=...
68
- # we want to ignore the SSH endpoint, as it's not linked to the default (a) endpoint
69
- following_key , following_value = next (lfs_env_items , (None , None ))
70
- ssh_endpoint = following_value if following_key == " SSH" else None
71
-
72
- endpoint = Endpoint (endpoint , {
73
- "Content-Type" : "application/vnd.git-lfs+json" ,
74
- "Accept" : "application/vnd.git-lfs+json" ,
75
- })
76
- if ssh_endpoint :
77
- # see https://github.com/git-lfs/git-lfs/blob/main/docs/api/authentication.md
78
- server , _ , path = ssh_endpoint .partition (":" )
79
- ssh_command = shutil .which (os .environ .get ("GIT_SSH" , os .environ .get ("GIT_SSH_COMMAND" , "ssh" )))
80
- assert ssh_command , "no ssh command found"
81
- resp = json .loads (subprocess .check_output ([ssh_command ,
82
- "-oStrictHostKeyChecking=accept-new" ,
83
- server ,
84
- "git-lfs-authenticate" ,
85
- path ,
86
- "download" ]))
87
- endpoint .href = resp .get ("href" , endpoint )
88
- endpoint .update_headers (resp .get ("header" , {}))
89
- url = urlparse (endpoint .href )
90
- # this is how actions/checkout persist credentials
91
- # see https://github.com/actions/checkout/blob/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b/src/git-auth-helper.ts#L56-L63
92
- auth = git ("config" , f"http.{ url .scheme } ://{ url .netloc } /.extraheader" )
93
- endpoint .update_headers (get_env (auth , sep = ": " ))
94
- if os .environ .get ("GITHUB_TOKEN" ):
95
- endpoint .headers ["Authorization" ] = f"token { os .environ ['GITHUB_TOKEN' ]} "
96
- if "Authorization" not in endpoint .headers :
97
- # last chance: use git credentials (possibly backed by a credential helper like the one installed by gh)
98
- # see https://git-scm.com/docs/git-credential
99
- credentials = get_env (git ("credential" , "fill" , check = True ,
100
- # drop leading / from url.path
101
- input = f"protocol={ url .scheme } \n host={ url .netloc } \n path={ url .path [1 :]} \n " ))
102
- auth = base64 .b64encode (f'{ credentials ["username" ]} :{ credentials ["password" ]} ' .encode ()).decode ('ascii' )
103
- endpoint .headers ["Authorization" ] = f"Basic { auth } "
104
- return endpoint
58
+ proc = subprocess .run (
59
+ ("git" ,) + args , stdout = subprocess .PIPE , text = True , cwd = source_dir , ** kwargs
60
+ )
61
+ return proc .stdout .strip () if proc .returncode == 0 else None
62
+
63
+
64
+ endpoint_re = re .compile (r"^Endpoint(?: \((.*)\))?$" )
65
+
66
+
67
+ def get_endpoint_addresses () -> typing .Iterable [Endpoint ]:
68
+ """Get all lfs endpoints, including SSH if present"""
69
+ lfs_env_items = get_env (
70
+ subprocess .check_output (["git" , "lfs" , "env" ], text = True , cwd = source_dir )
71
+ )
72
+ current_endpoint = None
73
+ for k , v in lfs_env_items :
74
+ m = endpoint_re .match (k )
75
+ if m :
76
+ if current_endpoint :
77
+ yield current_endpoint
78
+ href , _ , _ = v .partition (" " )
79
+ current_endpoint = Endpoint (name = m [1 ] or "default" , href = href )
80
+ elif k == " SSH" and current_endpoint :
81
+ current_endpoint .ssh = v
82
+ if current_endpoint :
83
+ yield current_endpoint
84
+
85
+
86
+ def get_endpoints () -> typing .Iterable [Endpoint ]:
87
+ for endpoint in get_endpoint_addresses ():
88
+ endpoint .headers = {
89
+ "Content-Type" : "application/vnd.git-lfs+json" ,
90
+ "Accept" : "application/vnd.git-lfs+json" ,
91
+ }
92
+ if endpoint .ssh :
93
+ # see https://github.com/git-lfs/git-lfs/blob/main/docs/api/authentication.md
94
+ server , _ , path = endpoint .ssh .partition (":" )
95
+ ssh_command = shutil .which (
96
+ os .environ .get ("GIT_SSH" , os .environ .get ("GIT_SSH_COMMAND" , "ssh" ))
97
+ )
98
+ assert ssh_command , "no ssh command found"
99
+ cmd = [
100
+ ssh_command ,
101
+ "-oStrictHostKeyChecking=accept-new" ,
102
+ server ,
103
+ "git-lfs-authenticate" ,
104
+ path ,
105
+ "download" ,
106
+ ]
107
+ try :
108
+ res = subprocess .run (cmd , stdout = subprocess .PIPE , timeout = 15 )
109
+ except subprocess .TimeoutExpired :
110
+ print (
111
+ f"WARNING: ssh timed out when connecting to { server } , ignoring { endpoint .name } endpoint" ,
112
+ file = sys .stderr ,
113
+ )
114
+ continue
115
+ if res .returncode != 0 :
116
+ print (
117
+ f"WARNING: ssh failed when connecting to { server } , ignoring { endpoint .name } endpoint" ,
118
+ file = sys .stderr ,
119
+ )
120
+ continue
121
+ ssh_resp = json .loads (res .stdout )
122
+ endpoint .href = ssh_resp .get ("href" , endpoint )
123
+ endpoint .update_headers (ssh_resp .get ("header" , {}).items ())
124
+ url = urlparse (endpoint .href )
125
+ # this is how actions/checkout persist credentials
126
+ # see https://github.com/actions/checkout/blob/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b/src/git-auth-helper.ts#L56-L63
127
+ auth = git ("config" , f"http.{ url .scheme } ://{ url .netloc } /.extraheader" ) or ""
128
+ endpoint .update_headers (get_env (auth , sep = ": " ))
129
+ if os .environ .get ("GITHUB_TOKEN" ):
130
+ endpoint .headers ["Authorization" ] = f"token { os .environ ['GITHUB_TOKEN' ]} "
131
+ if "Authorization" not in endpoint .headers :
132
+ # last chance: use git credentials (possibly backed by a credential helper like the one installed by gh)
133
+ # see https://git-scm.com/docs/git-credential
134
+ credentials = git (
135
+ "credential" ,
136
+ "fill" ,
137
+ check = True ,
138
+ # drop leading / from url.path
139
+ input = f"protocol={ url .scheme } \n host={ url .netloc } \n path={ url .path [1 :]} \n " ,
140
+ )
141
+ if credentials is None :
142
+ print (
143
+ f"WARNING: no authorization method found, ignoring { data .name } endpoint" ,
144
+ file = sys .stderr ,
145
+ )
146
+ continue
147
+ credentials = dict (get_env (credentials ))
148
+ auth = base64 .b64encode (
149
+ f'{ credentials ["username" ]} :{ credentials ["password" ]} ' .encode ()
150
+ ).decode ("ascii" )
151
+ endpoint .headers ["Authorization" ] = f"Basic { auth } "
152
+ yield endpoint
105
153
106
154
107
155
# see https://github.com/git-lfs/git-lfs/blob/310d1b4a7d01e8d9d884447df4635c7a9c7642c2/docs/api/basic-transfers.md
@@ -115,37 +163,44 @@ def get_locations(objects):
115
163
for i in indexes :
116
164
ret [i ] = objects [i ]["oid" ]
117
165
return ret
118
- endpoint = get_endpoint ()
119
166
data = {
120
167
"operation" : "download" ,
121
168
"transfers" : ["basic" ],
122
169
"objects" : [objects [i ] for i in indexes ],
123
170
"hash_algo" : "sha256" ,
124
171
}
125
- req = urllib .request .Request (
126
- f"{ endpoint .href } /objects/batch" ,
127
- headers = endpoint .headers ,
128
- data = json .dumps (data ).encode ("ascii" ),
129
- )
130
- with urllib .request .urlopen (req ) as resp :
131
- data = json .load (resp )
132
- assert len (data ["objects" ]) == len (indexes ), f"received { len (data )} objects, expected { len (indexes )} "
133
- for i , resp in zip (indexes , data ["objects" ]):
134
- ret [i ] = f'{ resp ["oid" ]} { resp ["actions" ]["download" ]["href" ]} '
135
- return ret
172
+ for endpoint in get_endpoints ():
173
+ req = urllib .request .Request (
174
+ f"{ endpoint .href } /objects/batch" ,
175
+ headers = endpoint .headers ,
176
+ data = json .dumps (data ).encode ("ascii" ),
177
+ )
178
+ try :
179
+ with urllib .request .urlopen (req ) as resp :
180
+ data = json .load (resp )
181
+ except urllib .request .HTTPError as e :
182
+ print (f"WARNING: encountered HTTPError { e } , ignoring endpoint { e .name } " )
183
+ continue
184
+ assert len (data ["objects" ]) == len (
185
+ indexes
186
+ ), f"received { len (data )} objects, expected { len (indexes )} "
187
+ for i , resp in zip (indexes , data ["objects" ]):
188
+ ret [i ] = f'{ resp ["oid" ]} { resp ["actions" ]["download" ]["href" ]} '
189
+ return ret
190
+ raise Exception (f"no valid endpoint found" )
136
191
137
192
138
193
def get_lfs_object (path ):
139
- with open (path , 'rb' ) as fileobj :
194
+ with open (path , "rb" ) as fileobj :
140
195
lfs_header = "version https://git-lfs.github.com/spec" .encode ()
141
196
actual_header = fileobj .read (len (lfs_header ))
142
197
sha256 = size = None
143
198
if lfs_header != actual_header :
144
199
return None
145
- data = get_env (fileobj .read ().decode (' ascii' ), sep = ' ' )
146
- assert data [' oid' ].startswith (' sha256:' ), f"unknown oid type: { data ['oid' ]} "
147
- _ , _ , sha256 = data [' oid' ].partition (':' )
148
- size = int (data [' size' ])
200
+ data = dict ( get_env (fileobj .read ().decode (" ascii" ), sep = " " ) )
201
+ assert data [" oid" ].startswith (" sha256:" ), f"unknown oid type: { data ['oid' ]} "
202
+ _ , _ , sha256 = data [" oid" ].partition (":" )
203
+ size = int (data [" size" ])
149
204
return {"oid" : sha256 , "size" : size }
150
205
151
206
0 commit comments