Skip to content

Commit 0de97ea

Browse files
committed
Initial commit
0 parents  commit 0de97ea

File tree

5 files changed

+271
-0
lines changed

5 files changed

+271
-0
lines changed

.github/workflows/main.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Update Archives
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
to_software_heritage:
7+
description: "Upload to Software Heritage"
8+
default: "true"
9+
required: false
10+
schedule:
11+
- cron: "0 5 * * *" # Everyday at 05:00 AM
12+
push:
13+
branches:
14+
- "main"
15+
paths:
16+
- "list.txt"
17+
18+
jobs:
19+
update:
20+
permissions:
21+
contents: write
22+
name: Update Archives
23+
runs-on: ubuntu-latest
24+
steps:
25+
- name: Checkout code
26+
uses: actions/checkout@v4
27+
with:
28+
token: ${{ secrets.GITHUB_TOKEN }}
29+
30+
- name: Overwrite options
31+
if: github.event_name == 'workflow_dispatch' && (github.event.inputs.to_software_heritage == 'true' || github.event.inputs.to_software_heritage == 'false')
32+
run: sed --in-place "s|^SOFTWARE_HERITAGE='.*'|SOFTWARE_HERITAGE='${{ github.event.inputs.to_software_heritage }}'|g" main.sh
33+
34+
- run: ./main.sh

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 r-jb
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# CodeStreisand
2+
3+
This project is for automatically archiving Git repositories using GitHub Actions. It creates a [bundle](https://git-scm.com/docs/git-bundle) for each repository in a list, and hosts them in a separate branch.
4+
5+
See this [demo](https://github.com/r-jb/CodeStreisand/tree/archive).
6+
7+
## Features
8+
9+
- Backup repositories to a GitHub repository
10+
- Automatically update the archived repos
11+
- Supports posting to the [Software Heritage](https://www.softwareheritage.org/)
12+
13+
## Usage
14+
15+
> [!TIP]
16+
> If you want to host your archives privately, you can also import this repo using [GitHub Importer](https://docs.github.com/en/migrations/importing-source-code/using-github-importer/importing-a-repository-with-github-importer#importing-a-repository-with-github-importer)
17+
18+
1. Fork this repository
19+
2. Edit [`list.txt`](list.txt) with the URLs of the repositories you want to archive, one per line
20+
3. Trigger the manual run by going to `Actions` -> `Update Archives` -> `Run Workflow`
21+
4. (Optional) Change the update schedule in [`main.yml`](.github/workflows/main.yml)
22+
23+
> [!NOTE]
24+
> The results are stored in the `archive` branch by default

list.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Haier vs Andre0512
2+
https://github.com/Andre0512/hon
3+
https://github.com/Andre0512/pyhOn
4+
https://github.com/Andre0512/hon-test-data
5+
6+
# Misc
7+
https://github.com/yt-dlp/yt-dlp
8+
https://github.com/jsavargas/zspotify

main.sh

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#!/usr/bin/env bash
2+
3+
ARCHIVE_BRANCH='archive'
4+
SOFTWARE_HERITAGE='true'
5+
6+
init_git() {
7+
git config --global user.name 'github-actions[bot]'
8+
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
9+
git checkout --orphan "$ARCHIVE_BRANCH"
10+
find . -not -name '*.bundle' -not -name './.git' -not -path './.git' -not -path './.git/*' -exec git rm -rf "{}" \;
11+
git pull origin "$ARCHIVE_BRANCH"
12+
}
13+
14+
repo_exist_not_empty() {
15+
local url ref
16+
url="$1"
17+
ref="$2"
18+
19+
(git ls-remote --quiet --exit-code --heads "$url" | grep --max-count 1 "refs/heads/$ref") &>/dev/null
20+
return $?
21+
}
22+
23+
is_comment() {
24+
if [[ "$1" =~ ^\s*[#](.*)$ ]]; then
25+
return 0
26+
else
27+
return 1
28+
fi
29+
}
30+
31+
url_exist() {
32+
http_code="$(curl --silent --output /dev/null --write-out "%{http_code}\n" "$1")"
33+
if [ "$http_code" = '200' ]; then
34+
return 0
35+
else
36+
return 1
37+
fi
38+
}
39+
40+
# Usage: add_to_readme <url> <name>
41+
add_to_readme() {
42+
local repo_url repo_name
43+
repo_url="$1"
44+
repo_name="$2"
45+
46+
# If no readme
47+
if [ ! -s README.md ]; then
48+
echo -e '# CodeStreisand
49+
50+
<details><summary>How to restore</summary>
51+
52+
## General instructions
53+
54+
1. Clone the `archive` branch
55+
56+
```bash
57+
git clone --branch archive https://github.com/your-username/your-repo codestreisand
58+
```
59+
60+
2. Restore from bundle
61+
62+
```bash
63+
git clone codestreisand/FILE.bundle
64+
```
65+
66+
## Download only a specific backup
67+
68+
```bash
69+
git clone --no-checkout --depth=1 --no-tags --branch archive https://github.com/your-username/your-repo codestreisand
70+
git -C codestreisand restore --staged FILE.bundle
71+
git -C codestreisand checkout FILE.bundle
72+
git clone codestreisand/FILE.bundle
73+
```
74+
75+
</details>
76+
77+
| Status | Name | Software Heritage | Last Update |
78+
| - | - | - | - |' >>README.md
79+
fi
80+
81+
# Check Software Heritage
82+
software_heritage_md='Not available'
83+
if url_exist "$repo_url"; then
84+
software_heritage_md="[Link](https://archive.softwareheritage.org/browse/origin/directory/?origin_url=$repo_url)"
85+
fi
86+
87+
# If not in readme
88+
if ! grep --silent "$repo_url" README.md; then
89+
current_date="$(date '+%d/%m/%Y')"
90+
if url_exist "$repo_url"; then
91+
echo "| 🟩 | [$repo_name]($repo_url) | $software_heritage_md | $current_date |" >>README.md
92+
elif [ -s "$repo_name.bundle" ]; then
93+
echo "| 🟨 | [$repo_name]($repo_url) | $software_heritage_md | $current_date |" >>README.md
94+
else
95+
echo "| 🟥 | [$repo_name]($repo_url) | $software_heritage_md | never |" >>README.md
96+
fi
97+
fi
98+
}
99+
100+
# Usage: update_repo_date <repo url>
101+
update_repo_date() {
102+
local repo_url
103+
repo_url="$1"
104+
105+
current_date="$(date '+%d/%m/%Y')"
106+
awk --assign url="$repo_url" --assign date="$current_date" 'BEGIN {FS=OFS="|"} $3 ~ url {$5=" "date" "} 1' README.md >README.md.temp && mv --force README.md.temp README.md
107+
}
108+
109+
# Usage: set_repo_status <repo url> <repo name>
110+
set_repo_status() {
111+
local repo_url repo_name color
112+
repo_url="$1"
113+
repo_name="$2"
114+
color=''
115+
116+
if url_exist "$1"; then
117+
color='🟩'
118+
elif [ -s "$2.bundle" ]; then
119+
color='🟨'
120+
else
121+
color='🟥'
122+
fi
123+
124+
awk --assign url="$repo_url" --assign status="$color" 'BEGIN {FS=OFS="|"} $3 ~ url {$2=" "status" "} 1' README.md >README.md.temp && mv --force README.md.temp README.md
125+
[ "$repo_name" = 'test-repo' ] && cat README.md
126+
}
127+
128+
# Usage: commit_and_push <repo name>
129+
commit_and_push() {
130+
local repo_name
131+
repo_name="$1"
132+
133+
git add README.md
134+
git add "$repo_name.bundle" >/dev/null 2>&1
135+
git commit --message="Update $repo_name" >/dev/null 2>&1
136+
git push origin "$ARCHIVE_BRANCH" >/dev/null 2>&1
137+
}
138+
139+
list="$(cat list.txt)"
140+
init_git
141+
while IFS= read -r entry; do
142+
if [ -n "$entry" ] && ! is_comment "$entry"; then
143+
repo_name="$(basename "$entry")"
144+
echo -e "\n\n---------------------------- Archiving ${repo_name}... ----------------------------\n\n"
145+
146+
# Save the current bundle hash
147+
current_hash=''
148+
if [ -s "$repo_name.bundle" ]; then
149+
current_hash="$(sha256sum "$repo_name.bundle" | awk '{print $1}')"
150+
fi
151+
152+
# Create a bundle
153+
if repo_exist_not_empty "$entry"; then
154+
git clone --mirror --recursive -j8 "$entry" "$repo_name"
155+
git -C "$repo_name" bundle create "../$repo_name.bundle" --all
156+
rm -rf "$repo_name"
157+
fi
158+
159+
add_to_readme "$entry" "$repo_name"
160+
set_repo_status "$entry" "$repo_name"
161+
162+
# Save the new bundle hash
163+
new_hash='default_value'
164+
if [ -s "$repo_name.bundle" ]; then
165+
new_hash="$(sha256sum "$repo_name.bundle" | awk '{print $1}')"
166+
fi
167+
168+
# If the bundle changed
169+
if [ "$new_hash" != "$current_hash" ]; then
170+
171+
# If a the bundle was updated
172+
if [ "$new_hash" != 'default_value' ]; then
173+
update_repo_date "$entry"
174+
fi
175+
176+
# Post to Software Heritage
177+
if [ "$SOFTWARE_HERITAGE" = 'true' ]; then
178+
response="$(curl --request POST "https://archive.softwareheritage.org/api/1/origin/save/git/url/$entry/" | jq --raw-output .save_request_status)"
179+
echo "Software Heritage: $response"
180+
fi
181+
fi
182+
commit_and_push "$repo_name"
183+
fi
184+
done <<<"$list"

0 commit comments

Comments
 (0)