Github-Backup/github_archive.py at main · Amulet-Team/Github-Backup · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# written mostly by chatgpt
import datetime
import os
import time
import requests
from pathlib import Path
from tqdm import tqdm

ORG = "Amulet-Team"
OUTPUT_DIR = Path("github-backup")
POLL_INTERVAL = 10  # seconds

TOKEN = os.environ.get("GITHUB_TOKEN")
if not TOKEN:
    raise RuntimeError("Set GITHUB_TOKEN environment variable")

HEADERS = {
    "Authorization": f"Bearer {TOKEN}",
    "Accept": "application/vnd.github+json",
}

OUTPUT_DIR.mkdir(exist_ok=True)


def gh_get(url):
    r = requests.get(url, headers=HEADERS)
    r.raise_for_status()
    return r.json()


def gh_post(url, payload):
    r = requests.post(url, headers=HEADERS, json=payload)
    r.raise_for_status()
    return r.json()


def download_file(url, path: Path):
    with requests.get(url, headers=HEADERS, stream=True) as r:
        r.raise_for_status()
        total = int(r.headers.get("content-length", 0))
        with open(path, "wb") as f, tqdm(
            total=total, unit="B", unit_scale=True, desc=path.name
        ) as loading_bar:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    loading_bar.update(len(chunk))


# ----------------------------
# Get all repos
# ----------------------------
def get_all_repos(org):
    repos = []
    page = 1
    while True:
        url = f"https://api.github.com/orgs/{org}/repos?per_page=100&page={page}"
        data = gh_get(url)
        if not data:
            break
        repos.extend(data)
        page += 1
    return repos


# ----------------------------
# Start migration
# ----------------------------
def start_migration(org, full_repo_name):
    payload = {
        "repositories": [full_repo_name],
        "lock_repositories": False,
        "exclude_attachments": False,
    }
    url = f"https://api.github.com/orgs/{org}/migrations"
    return gh_post(url, payload)["id"]


# ----------------------------
# Wait for migration
# ----------------------------
def wait_for_export(org, migration_id):
    url = f"https://api.github.com/orgs/{org}/migrations/{migration_id}"
    while True:
        status = gh_get(url)
        state = status["state"]
        print(f"Migration {migration_id}: {state}")
        if state == "exported":
            return
        if state == "failed":
            raise RuntimeError(f"Migration failed: {migration_id}")
        time.sleep(POLL_INTERVAL)


# ----------------------------
# Download archive
# ----------------------------
def download_archive(org, migration_id, repo_name):
    url = f"https://api.github.com/orgs/{org}/migrations/{migration_id}/archive"
    out = OUTPUT_DIR / f"{repo_name}.zip"
    download_file(url, out)


# ----------------------------
# Main
# ----------------------------
def main():
    print(f"Fetching repos for {ORG}...")
    repos = get_all_repos(ORG)

    for repo in repos:
        full = repo["full_name"]
        name = repo["name"]
        out_file = OUTPUT_DIR / f"{name}.zip"

        if out_file.exists():
            print(f"Skipping {full} (already downloaded)")
            continue

        print(f"\n=== Exporting {full} ===")

        migration_id = start_migration(ORG, full)
        print(f"Migration started: {migration_id}")

        wait_for_export(ORG, migration_id)
        download_archive(ORG, migration_id, name)

    print("\nAll done.")


if __name__ == "__main__":
    main()