Skip to content

Commit 3acd27e

Browse files
committed
cache changelog
1 parent 8ad6bcb commit 3acd27e

File tree

3 files changed

+184
-0
lines changed

3 files changed

+184
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Caching Changelogs
2+
3+
on: push
4+
5+
jobs:
6+
build:
7+
runs-on: ubuntu-latest
8+
9+
steps:
10+
- uses: actions/checkout@v4
11+
- uses: actions/setup-python@v5
12+
13+
- name: Cache Changelogs
14+
id: cache-Changelogs
15+
uses: actions/cache@v4
16+
with:
17+
path: parse-changelog/changelogs
18+
key: ${{ runner.os }}-Changelogs
19+
20+
- run: pip install --break-system-packages python-debian
21+
22+
- name: Generate Prime Numbers
23+
if: steps.cache-Changelogs.outputs.cache-hit != 'true'
24+
run: python3 download_changelog_files.py
25+
working-directory: parse-changelog

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ few-cve.jsonl
44
glibc-minor-version-issue/json
55
**/.venv
66
glvd-postgres-with-data copy/glvd-dump-10.sql
7+
parse-changelog/changelogs
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import logging
2+
from debian import changelog
3+
import requests
4+
import lzma
5+
import tarfile
6+
import io
7+
import re
8+
import json
9+
import gzip
10+
import os
11+
import shutil
12+
import tempfile
13+
14+
# Setup logging
15+
logging.basicConfig(
16+
level=logging.DEBUG,
17+
format="%(asctime)s [%(levelname)s] %(message)s"
18+
)
19+
logger = logging.getLogger(__name__)
20+
21+
22+
def download_apt_index_files():
23+
# Download releases-patch.json and extract versions with source_repo==true
24+
releases_url = "https://gardenlinux-glrd.s3.eu-central-1.amazonaws.com/releases-patch.json"
25+
try:
26+
resp = requests.get(releases_url)
27+
resp.raise_for_status()
28+
releases_data = resp.json()
29+
except Exception as e:
30+
logger.error(f"Failed to download or parse releases-patch.json: {e}")
31+
releases_data = {"releases": []}
32+
33+
versions = [
34+
f"{r['version']['major']}.{r['version']['minor']}"
35+
for r in releases_data.get("releases", [])
36+
if r.get("attributes", {}).get("source_repo") is True
37+
]
38+
39+
output_dir = "./lists"
40+
os.makedirs(output_dir, exist_ok=True)
41+
42+
for version in versions:
43+
sources_url = f"https://packages.gardenlinux.io/gardenlinux/dists/{version}/main/source/Sources.gz"
44+
output_filename = os.path.join(
45+
output_dir,
46+
f"packages.gardenlinux.io_gardenlinux_dists_{version}_main_source_Sources"
47+
)
48+
logger.info(f"Downloading {sources_url}")
49+
try:
50+
resp = requests.get(sources_url, stream=True)
51+
resp.raise_for_status()
52+
with tempfile.NamedTemporaryFile(delete=False) as tmp_gz:
53+
tmp_gz.write(resp.content)
54+
tmp_gz_path = tmp_gz.name
55+
with gzip.open(tmp_gz_path, "rb") as gz_in, open(output_filename, "wb") as out_f:
56+
shutil.copyfileobj(gz_in, out_f)
57+
os.remove(tmp_gz_path)
58+
logger.info(f"Wrote {output_filename}")
59+
60+
download_changelogs(output_filename, version)
61+
except Exception as e:
62+
logger.error(f"Failed to download or extract {sources_url}: {e}")
63+
continue
64+
65+
66+
def parse_debian_apt_source_index_file(file_path):
67+
logger.info(f"Parsing Debian APT source index file: {file_path}")
68+
try:
69+
with open(file_path, 'r') as f:
70+
content = f.read()
71+
except Exception as e:
72+
logger.error(f"Failed to read file {file_path}: {e}")
73+
raise
74+
75+
entries = re.split(r'\n\s*\n', content.strip())
76+
results = []
77+
78+
for entry in entries:
79+
lines = entry.strip().split('\n')
80+
format_ = None
81+
directory = None
82+
files = []
83+
in_files_section = False
84+
85+
for line in lines:
86+
if line.startswith('Format:'):
87+
format_ = line.split(':', 1)[1].strip()
88+
elif line.startswith('Directory:'):
89+
directory = line.split(':', 1)[1].strip()
90+
elif line.startswith('Package:'):
91+
package = line.split(':', 1)[1].strip()
92+
elif line.startswith('Files:'):
93+
in_files_section = True
94+
elif in_files_section:
95+
if line.strip() == '':
96+
continue
97+
if line.startswith(' ') or line.startswith('\t'):
98+
files.append(line.strip())
99+
else:
100+
in_files_section = False
101+
102+
# We have special handling for the kernel because we don't use debian's build for that
103+
if package != 'linux':
104+
results.append({
105+
'Format': format_,
106+
'Directory': directory,
107+
'Files': files,
108+
'Package': package
109+
})
110+
111+
logger.info(f"Parsed {len(results)} entries from source index file")
112+
return results
113+
114+
def download_changelogs(sources_path, gl_version):
115+
logger.info(f"Using apt sources file from {sources_path}")
116+
117+
parsed_entries = parse_debian_apt_source_index_file(sources_path)
118+
logger.info(f"Found {len(parsed_entries)} entries in source index file")
119+
120+
121+
for entry in parsed_entries:
122+
logger.info(f"Processing entry: {entry.get('Package', 'unknown')}")
123+
if entry['Format'] == "3.0 (quilt)":
124+
debian_tar_xz_file = next((f.split(' ')[2] for f in entry['Files'] if f.endswith('debian.tar.xz')), '')
125+
if debian_tar_xz_file != '':
126+
url = f"https://packages.gardenlinux.io/gardenlinux/{entry['Directory']}/{debian_tar_xz_file}"
127+
logger.info(f"Downloading debian.tar.xz from {url}")
128+
try:
129+
response = requests.get(url)
130+
response.raise_for_status()
131+
except Exception as e:
132+
logger.error(f"Failed to download {url}: {e}")
133+
continue
134+
135+
try:
136+
decompressed = lzma.decompress(response.content)
137+
except Exception as e:
138+
logger.error(f"Failed to decompress xz file for {entry['Package']}: {e}")
139+
continue
140+
141+
try:
142+
with tarfile.open(fileobj=io.BytesIO(decompressed)) as tar:
143+
changelog_member = tar.getmember("debian/changelog")
144+
changelog_file = tar.extractfile(changelog_member)
145+
changelog_content = changelog_file.read().decode("utf-8")
146+
changelog_dir = f"changelogs/{gl_version}"
147+
os.makedirs(changelog_dir, exist_ok=True)
148+
output_filename = f"{changelog_dir}/{entry['Package']}_changelog.txt"
149+
with open(output_filename, "w", encoding="utf-8") as out_f:
150+
out_f.write(changelog_content)
151+
logger.info(f"Wrote changelog to {output_filename}")
152+
153+
except Exception as e:
154+
logger.error(f"Failed to extract or parse changelog for {entry['Package']}: {e}")
155+
continue
156+
157+
158+
download_apt_index_files()

0 commit comments

Comments
 (0)