-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_archive.py
More file actions
132 lines (104 loc) · 3.34 KB
/
github_archive.py
File metadata and controls
132 lines (104 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# written mostly by chatgpt
import datetime
import os
import time
import requests
from pathlib import Path
from tqdm import tqdm
ORG = "Amulet-Team"
OUTPUT_DIR = Path("github-backup")
POLL_INTERVAL = 10 # seconds
TOKEN = os.environ.get("GITHUB_TOKEN")
if not TOKEN:
raise RuntimeError("Set GITHUB_TOKEN environment variable")
HEADERS = {
"Authorization": f"Bearer {TOKEN}",
"Accept": "application/vnd.github+json",
}
OUTPUT_DIR.mkdir(exist_ok=True)
def gh_get(url):
r = requests.get(url, headers=HEADERS)
r.raise_for_status()
return r.json()
def gh_post(url, payload):
r = requests.post(url, headers=HEADERS, json=payload)
r.raise_for_status()
return r.json()
def download_file(url, path: Path):
with requests.get(url, headers=HEADERS, stream=True) as r:
r.raise_for_status()
total = int(r.headers.get("content-length", 0))
with open(path, "wb") as f, tqdm(
total=total, unit="B", unit_scale=True, desc=path.name
) as loading_bar:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
loading_bar.update(len(chunk))
# ----------------------------
# Get all repos
# ----------------------------
def get_all_repos(org):
repos = []
page = 1
while True:
url = f"https://api.github.com/orgs/{org}/repos?per_page=100&page={page}"
data = gh_get(url)
if not data:
break
repos.extend(data)
page += 1
return repos
# ----------------------------
# Start migration
# ----------------------------
def start_migration(org, full_repo_name):
payload = {
"repositories": [full_repo_name],
"lock_repositories": False,
"exclude_attachments": False,
}
url = f"https://api.github.com/orgs/{org}/migrations"
return gh_post(url, payload)["id"]
# ----------------------------
# Wait for migration
# ----------------------------
def wait_for_export(org, migration_id):
url = f"https://api.github.com/orgs/{org}/migrations/{migration_id}"
while True:
status = gh_get(url)
state = status["state"]
print(f"Migration {migration_id}: {state}")
if state == "exported":
return
if state == "failed":
raise RuntimeError(f"Migration failed: {migration_id}")
time.sleep(POLL_INTERVAL)
# ----------------------------
# Download archive
# ----------------------------
def download_archive(org, migration_id, repo_name):
url = f"https://api.github.com/orgs/{org}/migrations/{migration_id}/archive"
out = OUTPUT_DIR / f"{repo_name}.zip"
download_file(url, out)
# ----------------------------
# Main
# ----------------------------
def main():
print(f"Fetching repos for {ORG}...")
repos = get_all_repos(ORG)
for repo in repos:
full = repo["full_name"]
name = repo["name"]
out_file = OUTPUT_DIR / f"{name}.zip"
if out_file.exists():
print(f"Skipping {full} (already downloaded)")
continue
print(f"\n=== Exporting {full} ===")
migration_id = start_migration(ORG, full)
print(f"Migration started: {migration_id}")
wait_for_export(ORG, migration_id)
download_archive(ORG, migration_id, name)
print("\nAll done.")
if __name__ == "__main__":
main()