7
7
* the sha256 hash, a space character and a transient download link obtained via the LFS protocol otherwise
8
8
If --hash-only is provided, the transient URL will not be fetched and printed
9
9
"""
10
-
10
+ import dataclasses
11
11
import sys
12
12
import pathlib
13
13
import subprocess
14
14
import os
15
15
import shutil
16
16
import json
17
+ import typing
17
18
import urllib .request
18
19
from urllib .parse import urlparse
19
20
import re
20
21
import base64
21
22
from dataclasses import dataclass
22
- from typing import Dict
23
23
import argparse
24
24
25
25
@@ -32,11 +32,13 @@ def options():
32
32
33
33
@dataclass
34
34
class Endpoint :
35
+ name : str
35
36
href : str
36
- headers : Dict [str , str ]
37
+ ssh : str | None = None
38
+ headers : typing .Dict [str , str ] = dataclasses .field (default_factory = dict )
37
39
38
- def update_headers (self , d : Dict [ str , str ]):
39
- self .headers .update ((k .capitalize (), v ) for k , v in d . items () )
40
+ def update_headers (self , d : typing . Iterable [ tuple [ str , str ] ]):
41
+ self .headers .update ((k .capitalize (), v ) for k , v in d )
40
42
41
43
42
44
opts = options ()
@@ -47,88 +49,107 @@ def update_headers(self, d: Dict[str, str]):
47
49
).strip ()
48
50
49
51
50
- def get_env (s , sep = "=" ):
51
- ret = {}
52
+ def get_env (s : str , sep : str = "=" ) -> typing .Iterable [tuple [str , str ]]:
52
53
for m in re .finditer (rf"(.*?){ sep } (.*)" , s , re .M ):
53
- ret .setdefault (* m .groups ())
54
- return ret
54
+ yield m .groups ()
55
55
56
56
57
57
def git (* args , ** kwargs ):
58
- return subprocess .run (
58
+ proc = subprocess .run (
59
59
("git" ,) + args , stdout = subprocess .PIPE , text = True , cwd = source_dir , ** kwargs
60
- ).stdout .strip ()
60
+ )
61
+ return proc .stdout .strip () if proc .returncode == 0 else None
62
+
61
63
64
+ endpoint_re = re .compile (r"^Endpoint(?: \((.*)\))?$" )
62
65
63
- def get_endpoint ():
64
- lfs_env_items = iter (
65
- get_env (
66
- subprocess . check_output ([ "git" , "lfs" , "env" ], text = True , cwd = source_dir )
67
- ). items ( )
66
+
67
+ def get_endpoint_addresses () -> typing . Iterable [ Endpoint ]:
68
+ """Get all lfs endpoints, including SSH if present"""
69
+ lfs_env_items = get_env (
70
+ subprocess . check_output ([ "git" , "lfs" , "env" ], text = True , cwd = source_dir )
68
71
)
69
- endpoint = next (v for k , v in lfs_env_items if k .startswith ("Endpoint" ))
70
- endpoint , _ , _ = endpoint .partition (" " )
71
- # only take the ssh endpoint if it follows directly after the first endpoint we found
72
- # in a situation like
73
- # Endpoint (a)=...
74
- # Endpoint (b)=...
75
- # SSH=...
76
- # we want to ignore the SSH endpoint, as it's not linked to the default (a) endpoint
77
- following_key , following_value = next (lfs_env_items , (None , None ))
78
- ssh_endpoint = following_value if following_key == " SSH" else None
79
-
80
- endpoint = Endpoint (
81
- endpoint ,
82
- {
72
+ current_endpoint = None
73
+ for k , v in lfs_env_items :
74
+ m = endpoint_re .match (k )
75
+ if m :
76
+ if current_endpoint :
77
+ yield current_endpoint
78
+ href , _ , _ = v .partition (" " )
79
+ current_endpoint = Endpoint (name = m [1 ] or "default" , href = href )
80
+ elif k == " SSH" and current_endpoint :
81
+ current_endpoint .ssh = v
82
+ if current_endpoint :
83
+ yield current_endpoint
84
+
85
+
86
+ def get_endpoints () -> typing .Iterable [Endpoint ]:
87
+ for endpoint in get_endpoint_addresses ():
88
+ endpoint .headers = {
83
89
"Content-Type" : "application/vnd.git-lfs+json" ,
84
90
"Accept" : "application/vnd.git-lfs+json" ,
85
- },
86
- )
87
- if ssh_endpoint :
88
- # see https://github.com/git-lfs/git-lfs/blob/main/docs/api/authentication.md
89
- server , _ , path = ssh_endpoint .partition (":" )
90
- ssh_command = shutil .which (
91
- os .environ .get ("GIT_SSH" , os .environ .get ("GIT_SSH_COMMAND" , "ssh" ))
92
- )
93
- assert ssh_command , "no ssh command found"
94
- resp = json .loads (
95
- subprocess .check_output (
96
- [
97
- ssh_command ,
98
- "-oStrictHostKeyChecking=accept-new" ,
99
- server ,
100
- "git-lfs-authenticate" ,
101
- path ,
102
- "download" ,
103
- ]
91
+ }
92
+ if endpoint .ssh :
93
+ # see https://github.com/git-lfs/git-lfs/blob/main/docs/api/authentication.md
94
+ server , _ , path = endpoint .ssh .partition (":" )
95
+ ssh_command = shutil .which (
96
+ os .environ .get ("GIT_SSH" , os .environ .get ("GIT_SSH_COMMAND" , "ssh" ))
104
97
)
105
- )
106
- endpoint .href = resp .get ("href" , endpoint )
107
- endpoint .update_headers (resp .get ("header" , {}))
108
- url = urlparse (endpoint .href )
109
- # this is how actions/checkout persist credentials
110
- # see https://github.com/actions/checkout/blob/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b/src/git-auth-helper.ts#L56-L63
111
- auth = git ("config" , f"http.{ url .scheme } ://{ url .netloc } /.extraheader" )
112
- endpoint .update_headers (get_env (auth , sep = ": " ))
113
- if os .environ .get ("GITHUB_TOKEN" ):
114
- endpoint .headers ["Authorization" ] = f"token { os .environ ['GITHUB_TOKEN' ]} "
115
- if "Authorization" not in endpoint .headers :
116
- # last chance: use git credentials (possibly backed by a credential helper like the one installed by gh)
117
- # see https://git-scm.com/docs/git-credential
118
- credentials = get_env (
119
- git (
98
+ assert ssh_command , "no ssh command found"
99
+ cmd = [
100
+ ssh_command ,
101
+ "-oStrictHostKeyChecking=accept-new" ,
102
+ server ,
103
+ "git-lfs-authenticate" ,
104
+ path ,
105
+ "download" ,
106
+ ]
107
+ try :
108
+ res = subprocess .run (cmd , stdout = subprocess .PIPE , timeout = 15 )
109
+ except subprocess .TimeoutExpired :
110
+ print (
111
+ f"WARNING: ssh timed out when connecting to { server } , ignoring { endpoint .name } endpoint" ,
112
+ file = sys .stderr ,
113
+ )
114
+ continue
115
+ if res .returncode != 0 :
116
+ print (
117
+ f"WARNING: ssh failed when connecting to { server } , ignoring { endpoint .name } endpoint" ,
118
+ file = sys .stderr ,
119
+ )
120
+ continue
121
+ ssh_resp = json .loads (res .stdout )
122
+ endpoint .href = ssh_resp .get ("href" , endpoint )
123
+ endpoint .update_headers (ssh_resp .get ("header" , {}).items ())
124
+ url = urlparse (endpoint .href )
125
+ # this is how actions/checkout persist credentials
126
+ # see https://github.com/actions/checkout/blob/44c2b7a8a4ea60a981eaca3cf939b5f4305c123b/src/git-auth-helper.ts#L56-L63
127
+ auth = git ("config" , f"http.{ url .scheme } ://{ url .netloc } /.extraheader" ) or ""
128
+ endpoint .update_headers (get_env (auth , sep = ": " ))
129
+ if os .environ .get ("GITHUB_TOKEN" ):
130
+ endpoint .headers ["Authorization" ] = f"token { os .environ ['GITHUB_TOKEN' ]} "
131
+ if "Authorization" not in endpoint .headers :
132
+ # last chance: use git credentials (possibly backed by a credential helper like the one installed by gh)
133
+ # see https://git-scm.com/docs/git-credential
134
+ credentials = git (
120
135
"credential" ,
121
136
"fill" ,
122
137
check = True ,
123
138
# drop leading / from url.path
124
139
input = f"protocol={ url .scheme } \n host={ url .netloc } \n path={ url .path [1 :]} \n " ,
125
140
)
126
- )
127
- auth = base64 .b64encode (
128
- f'{ credentials ["username" ]} :{ credentials ["password" ]} ' .encode ()
129
- ).decode ("ascii" )
130
- endpoint .headers ["Authorization" ] = f"Basic { auth } "
131
- return endpoint
141
+ if credentials is None :
142
+ print (
143
+ f"WARNING: no authorization method found, ignoring { data .name } endpoint" ,
144
+ file = sys .stderr ,
145
+ )
146
+ continue
147
+ credentials = dict (get_env (credentials ))
148
+ auth = base64 .b64encode (
149
+ f'{ credentials ["username" ]} :{ credentials ["password" ]} ' .encode ()
150
+ ).decode ("ascii" )
151
+ endpoint .headers ["Authorization" ] = f"Basic { auth } "
152
+ yield endpoint
132
153
133
154
134
155
# see https://github.com/git-lfs/git-lfs/blob/310d1b4a7d01e8d9d884447df4635c7a9c7642c2/docs/api/basic-transfers.md
@@ -142,26 +163,31 @@ def get_locations(objects):
142
163
for i in indexes :
143
164
ret [i ] = objects [i ]["oid" ]
144
165
return ret
145
- endpoint = get_endpoint ()
146
166
data = {
147
167
"operation" : "download" ,
148
168
"transfers" : ["basic" ],
149
169
"objects" : [objects [i ] for i in indexes ],
150
170
"hash_algo" : "sha256" ,
151
171
}
152
- req = urllib .request .Request (
153
- f"{ endpoint .href } /objects/batch" ,
154
- headers = endpoint .headers ,
155
- data = json .dumps (data ).encode ("ascii" ),
156
- )
157
- with urllib .request .urlopen (req ) as resp :
158
- data = json .load (resp )
159
- assert len (data ["objects" ]) == len (
160
- indexes
161
- ), f"received { len (data )} objects, expected { len (indexes )} "
162
- for i , resp in zip (indexes , data ["objects" ]):
163
- ret [i ] = f'{ resp ["oid" ]} { resp ["actions" ]["download" ]["href" ]} '
164
- return ret
172
+ for endpoint in get_endpoints ():
173
+ req = urllib .request .Request (
174
+ f"{ endpoint .href } /objects/batch" ,
175
+ headers = endpoint .headers ,
176
+ data = json .dumps (data ).encode ("ascii" ),
177
+ )
178
+ try :
179
+ with urllib .request .urlopen (req ) as resp :
180
+ data = json .load (resp )
181
+ except urllib .request .HTTPError as e :
182
+ print (f"WARNING: encountered HTTPError { e } , ignoring endpoint { e .name } " )
183
+ continue
184
+ assert len (data ["objects" ]) == len (
185
+ indexes
186
+ ), f"received { len (data )} objects, expected { len (indexes )} "
187
+ for i , resp in zip (indexes , data ["objects" ]):
188
+ ret [i ] = f'{ resp ["oid" ]} { resp ["actions" ]["download" ]["href" ]} '
189
+ return ret
190
+ raise Exception (f"no valid endpoint found" )
165
191
166
192
167
193
def get_lfs_object (path ):
@@ -171,7 +197,7 @@ def get_lfs_object(path):
171
197
sha256 = size = None
172
198
if lfs_header != actual_header :
173
199
return None
174
- data = get_env (fileobj .read ().decode ("ascii" ), sep = " " )
200
+ data = dict ( get_env (fileobj .read ().decode ("ascii" ), sep = " " ) )
175
201
assert data ["oid" ].startswith ("sha256:" ), f"unknown oid type: { data ['oid' ]} "
176
202
_ , _ , sha256 = data ["oid" ].partition (":" )
177
203
size = int (data ["size" ])
0 commit comments