Skip to content

Commit 7170cff

Browse files
committed
test merger
1 parent 5104b32 commit 7170cff

File tree

9 files changed

+456
-1
lines changed

9 files changed

+456
-1
lines changed

.github/workflows/sync.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,31 +28,37 @@ jobs:
2828
source: "certstream"
2929
dest: "ghcr"
3030
script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/sync_certstream_ghcr.sh"
31+
merge-script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_certstream_ghcr.sh"
3132

3233
- runner: "ubuntu-latest"
3334
source: "certstream"
3435
dest: "hf"
3536
script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/sync_certstream_hf.sh"
37+
merge-script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_certstream_hf.sh"
3638

3739
- runner: "ubuntu-latest"
3840
source: "trickest"
3941
dest: "ghcr"
4042
script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/sync_trickest_ghcr.sh"
43+
merge-script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_trickest_ghcr.sh"
4144

4245
- runner: "ubuntu-latest"
4346
source: "trickest"
4447
dest: "hf"
4548
script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/sync_trickest_hf.sh"
49+
merge-script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_trickest_hf.sh"
4650

4751
- runner: "ubuntu-latest"
4852
source: "sni-ip-ranges"
4953
dest: "ghcr"
5054
script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/sync_sni-ip-ranges_ghcr.sh"
55+
merge-script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_sni-ip-ranges_ghcr.sh"
5156

5257
- runner: "ubuntu-latest"
5358
source: "sni-ip-ranges"
5459
dest: "hf"
5560
script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/sync_sni-ip-ranges_hf.sh"
61+
merge-script: "https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_sni-ip-ranges_hf.sh"
5662

5763
steps:
5864
- name: Install Addons
@@ -144,4 +150,24 @@ jobs:
144150
/tmp/DATA/**
145151
${{ env.ARTIFACTS_DIR }}/**
146152
show-summary: true
153+
continue-on-error: true
154+
155+
- name: Merge [${{ matrix.source }} ==> ${{ matrix.dest }}]
156+
if: env.MERGE_DATA == 'YES'
157+
env:
158+
GHCR_TOKEN: "${{ github.token }}"
159+
GITHUB_TOKEN: "${{ github.token }}"
160+
#GITHUB_TOKEN: "${{ secrets.RO_GHTOKEN }}"
161+
HF_TOKEN: "${{ secrets.HF_TOKEN }}"
162+
run: |
163+
#Presets
164+
set +x ; set +e
165+
#--------------#
166+
find "${SYSTMP}" -type f -size +10M -exec rm -rf "{}" \; 2>/dev/null
167+
find "${SYSTMP}" -type d -exec bash -c 'test $(du -sb "{}" | cut -f1 | tr -d "[:space:]") -gt 10485760 && rm -rf "{}"' \; 2>/dev/null
168+
curl -qfsSL "${{ matrix.merge-script }}" -o "./sync.sh"
169+
dos2unix --quiet "./sync.sh"
170+
chmod +x "./sync.sh"
171+
PARALLEL_LIMIT="$(($(nproc)+1))" bash "./sync.sh"
172+
wait ; echo
147173
continue-on-error: true
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
#!/usr/bin/env bash
2+
## <DO NOT RUN STANDALONE, meant for CI Only>
3+
## Meant to merge sni-ip-ranges ==> GHCR
4+
## Self: https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_sni-ip-ranges_ghcr.sh
5+
#-------------------------------------------------------#
6+
7+
#-------------------------------------------------------#
8+
##Sanity
9+
if ! command -v anew-rs &> /dev/null; then
10+
echo -e "[-] Failed to find anew-rs\n"
11+
exit 1
12+
fi
13+
if [ -z "${GHCR_TOKEN+x}" ] || [ -z "${GHCR_TOKEN##*[[:space:]]}" ]; then
14+
echo -e "\n[-] FATAL: Failed to Find GHCR_TOKEN (\${GHCR_TOKEN}\n"
15+
exit 1
16+
fi
17+
if ! command -v oras &> /dev/null; then
18+
echo -e "[-] Failed to find oras\n"
19+
exit 1
20+
else
21+
oras login --username "Azathothas" --password "${GHCR_TOKEN}" "ghcr.io"
22+
fi
23+
##ENV
24+
export TZ="UTC"
25+
SYSTMP="$(dirname $(mktemp -u))" && export SYSTMP="${SYSTMP}"
26+
TMPDIR="$(mktemp -d)" && export TMPDIR="${TMPDIR}" ; echo -e "\n[+] Using TEMP: ${TMPDIR}\n"
27+
rm -rf "${SYSTMP}/DATA" 2>/dev/null ; mkdir -p "${SYSTMP}/DATA"
28+
if [[ -z "${USER_AGENT}" ]]; then
29+
USER_AGENT="$(curl -qfsSL 'https://raw.githubusercontent.com/pkgforge/devscripts/refs/heads/main/Misc/User-Agents/ua_firefox_macos_latest.txt')"
30+
fi
31+
##Repo
32+
ORAS_LOCAL="$(mktemp -d)"
33+
GHCRPKG_URL="ghcr.io/pkgforge-security/domains/sni-ip-ranges"
34+
PKG_WEBPAGE="$(echo "https://github.com/pkgforge-security/domains" | sed 's|^/*||; s|/*$||' | tr -d '[:space:]')"
35+
export GHCRPKG_URL ORAS_LOCAL PKG_WEBPAGE
36+
#-------------------------------------------------------#
37+
38+
#-------------------------------------------------------#
39+
##Func
40+
sync_to_ghcr()
41+
{
42+
if [[ -d "${ORAS_LOCAL}/DATA/sni-ip-ranges" ]] && \
43+
[[ "$(du -s "${ORAS_LOCAL}/DATA/sni-ip-ranges" | cut -f1 | tr -cd '0-9' | tr -d '[:space:]')" -gt 1000 ]]; then
44+
pushd "${ORAS_LOCAL}" &>/dev/null &&\
45+
unset GHCRPKG_TAG MODTIME_TEMP
46+
MODTIME="$(date --utc '+%Y-%m-%d_T%H-%M-%S' | sed 's/ZZ\+/Z/Ig' | tr -d '[:space:]')"
47+
GHCRPKG_TAG="$(echo "merged-${MODTIME}" | sed 's/[^a-zA-Z0-9._-]/_/g; s/_*$//')"
48+
export GHCRPKG_TAG MODTIME
49+
#Check Tag
50+
if ! oras manifest fetch "${GHCRPKG_URL}:${GHCRPKG_TAG}" |\
51+
jq -r '.annotations["org.opencontainers.image.created"]' | tr -d '[:space:]' |\
52+
grep -qiE '[0-9]{4}-[0-9]{2}-[0-9]{2}'; then
53+
oras push --debug --config "/dev/null:application/vnd.oci.empty.v1+json" "${GHCRPKG_URL}:${GHCRPKG_TAG}"
54+
sleep 2
55+
fi
56+
#Construct Upload CMD
57+
ghcr_push_cmd()
58+
{
59+
for i in {1..10}; do
60+
pushd "${ORAS_LOCAL}" &>/dev/null
61+
if [ -z "${PKG_DATE+x}" ] || [ -z "${PKG_DATE##*[[:space:]]}" ]; then
62+
PKG_DATETMP="$(date --utc +%Y-%m-%dT%H:%M:%S)Z"
63+
PKG_DATE="$(echo "${PKG_DATETMP}" | sed 's/ZZ\+/Z/Ig' | tr -d '[:space:]')"
64+
fi
65+
#unset ghcr_push ; ghcr_push=(oras push --concurrency "10" --disable-path-validation)
66+
unset ghcr_push ; ghcr_push=(oras push --disable-path-validation)
67+
ghcr_push+=(--config "/dev/null:application/vnd.oci.empty.v1+json")
68+
ghcr_push+=(--annotation "com.github.package.type=container")
69+
ghcr_push+=(--annotation "dev.pkgforge-security.domains.upload_date=${PKG_DATE}")
70+
ghcr_push+=(--annotation "org.opencontainers.image.authors=https://docs.pkgforge.dev/contact/chat")
71+
ghcr_push+=(--annotation "org.opencontainers.image.created=${PKG_DATE}")
72+
ghcr_push+=(--annotation "org.opencontainers.image.description=sni-ip-ranges-data-merged-${MODTIME}")
73+
ghcr_push+=(--annotation "org.opencontainers.image.documentation=${PKG_WEBPAGE}")
74+
ghcr_push+=(--annotation "org.opencontainers.image.licenses=blessing")
75+
ghcr_push+=(--annotation "org.opencontainers.image.ref.name=merged-${MODTIME}")
76+
ghcr_push+=(--annotation "org.opencontainers.image.revision=merged-${MODTIME}")
77+
ghcr_push+=(--annotation "org.opencontainers.image.source=${PKG_WEBPAGE}")
78+
ghcr_push+=(--annotation "org.opencontainers.image.title=sni-ip-ranges-merged-${MODTIME}")
79+
ghcr_push+=(--annotation "org.opencontainers.image.url=${SRC_URL}")
80+
ghcr_push+=(--annotation "org.opencontainers.image.vendor=pkgforge-security")
81+
ghcr_push+=(--annotation "org.opencontainers.image.version=merged-${MODTIME}")
82+
ghcr_push+=("${GHCRPKG_URL}:${GHCRPKG_TAG},merged")
83+
pushd "${ORAS_LOCAL}/DATA/sni-ip-ranges" &>/dev/null
84+
oras_files=() ; mapfile -t oras_files < <(find "." -maxdepth 1 -type f -not -path "*/\.*" -print 2>/dev/null)
85+
for o_f in "${oras_files[@]}"; do
86+
[[ -f "${o_f}" && -s "${o_f}" ]] && ghcr_push+=("${o_f}")
87+
done
88+
"${ghcr_push[@]}" ; sleep 5
89+
#Check
90+
if [[ "$(oras manifest fetch "${GHCRPKG_URL}:${GHCRPKG_TAG}" | jq -r '.annotations["dev.pkgforge-security.domains.upload_date"]' | tr -d '[:space:]')" == "${PKG_DATE}" ]]; then
91+
echo -e "\n[+] Registry --> https://${GHCRPKG_URL}"
92+
pushd "${TMPDIR}" &>/dev/null ; return
93+
else
94+
echo -e "\n[-] Failed to Push Artifact to ${GHCRPKG_URL}:${GHCRPKG_TAG} (Retrying ${i}/10)\n"
95+
fi
96+
sleep "$(shuf -i 500-4500 -n 1)e-3"
97+
done
98+
}
99+
export -f ghcr_push_cmd
100+
#First Set of tries
101+
ghcr_push_cmd
102+
#Check if Failed
103+
if [[ "$(oras manifest fetch "${GHCRPKG_URL}:${GHCRPKG_TAG}" | jq -r '.annotations["dev.pkgforge-security.domains.upload_date"]' | tr -d '[:space:]')" != "${PKG_DATE}" ]]; then
104+
echo -e "\n[✗] Failed to Push Artifact to ${GHCRPKG_URL}:${GHCRPKG_TAG}\n"
105+
#Second set of Tries
106+
echo -e "\n[-] Retrying ...\n"
107+
ghcr_push_cmd
108+
if [[ "$(oras manifest fetch "${GHCRPKG_URL}:${GHCRPKG_TAG}" | jq -r '.annotations["dev.pkgforge-security.domains.upload_date"]' | tr -d '[:space:]')" != "${PKG_DATE}" ]]; then
109+
oras manifest fetch "${GHCRPKG_URL}:${GHCRPKG_TAG}" | jq .
110+
echo -e "\n[✗] Failed to Push Artifact to ${GHCRPKG_URL}:${GHCRPKG_TAG}\n"
111+
pushd "${TMPDIR}" &>/dev/null ; return
112+
fi
113+
fi
114+
du -sh "${ORAS_LOCAL}/DATA/sni-ip-ranges" && realpath "${ORAS_LOCAL}/DATA/sni-ip-ranges"
115+
fi
116+
}
117+
export -f sync_to_ghcr
118+
#-------------------------------------------------------#
119+
120+
#-------------------------------------------------------#
121+
##SRC//DEST
122+
#https://huggingface.co/datasets/pkgforge-security/domains/tree/main/DATA/sni-ip-ranges
123+
pushd "${TMPDIR}" &>/dev/null
124+
I_SRCS=(amazon digitalocean google microsoft oracle)
125+
echo -e "\n[+] Data: ${I_SRCS[*]}\n"
126+
if [[ -n "${I_SRCS[*]}" && "${#I_SRCS[@]}" -ge 1 ]]; then
127+
#Check
128+
unset I_D SRC_URL_STATUS SRC_URL_TMP
129+
SRC_URL_TMP="https://huggingface.co/datasets/pkgforge-security/domains/tree/main/DATA/sni-ip-ranges"
130+
SRC_URL_STATUS="$(curl -X "HEAD" -qfksSL "${SRC_URL_TMP}" -I | sed -n 's/^[[:space:]]*HTTP\/[0-9.]*[[:space:]]\+\([0-9]\+\).*/\1/p' | tail -n1 | tr -d '[:space:]')"
131+
if echo "${SRC_URL_STATUS}" | grep -qiv '200$'; then
132+
SRC_URL_STATUS="$(curl -A "${USER_AGENT}" -X "HEAD" -qfksSL "${SRC_URL_TMP}" -I | sed -n 's/^[[:space:]]*HTTP\/[0-9.]*[[:space:]]\+\([0-9]\+\).*/\1/p' | tail -n1 | tr -d '[:space:]')"
133+
echo -e "\n[-] FATAL: Server seems to be Offline\n"
134+
curl -A "${USER_AGENT}" -w "(SERVER) <== %{url}\n" -X "HEAD" -qfksSL "${SRC_URL_TMP}" -I ; echo -e "\n"
135+
exit 1
136+
elif [[ "${SRC_URL_STATUS}" == "200" ]]; then
137+
SRC_URL="https://huggingface.co/datasets/pkgforge-security/domains/resolve/main/DATA/sni-ip-ranges"
138+
fi
139+
echo -e "\n[+] Server ==> ${SRC_URL_TMP}"
140+
#Download
141+
for I_D in "${I_SRCS[@]}"; do
142+
echo -e "\n[+] Processing ${I_D}"
143+
#Set
144+
unset INPUT_TMP
145+
INPUT_TMP="$(echo "${I_D}" | tr -d '[:space:]')"
146+
#Get
147+
for i in {1..2}; do
148+
curl -A "${USER_AGENT}" -w "(DL) <== %{url}\n" -kfSL "${SRC_URL}/${I_D}.txt" --retry 3 --retry-all-errors -o "${TMPDIR}/${I_D}.txt"
149+
if [[ -s "${TMPDIR}/${I_D}.txt" && $(stat -c%s "${TMPDIR}/${I_D}.txt") -gt 1000 ]]; then
150+
du -sh "${TMPDIR}/${I_D}.txt"
151+
pushd "${TMPDIR}" &>/dev/null ; break
152+
else
153+
echo "Retrying... ${i}/2"
154+
sleep 2
155+
fi
156+
done
157+
done
158+
else
159+
echo -e "\n[-] FATAL: Failed to Set Sources\n"
160+
echo -e "[+] Sources : ${I_SRCS[*]}"
161+
exit 1
162+
fi
163+
#-------------------------------------------------------#
164+
165+
#-------------------------------------------------------#
166+
#Main
167+
pushd "${TMPDIR}" &>/dev/null
168+
unset I_F I_FILES
169+
I_FILES=("${TMPDIR}/amazon.txt" "${TMPDIR}/digitalocean.txt" "${TMPDIR}/google.txt" "${TMPDIR}/microsoft.txt" "${TMPDIR}/oracle.txt")
170+
#Check & Merge
171+
rm -rf "${ORAS_LOCAL}/DATA/sni-ip-ranges" 2>/dev/null
172+
mkdir -p "${ORAS_LOCAL}/DATA/sni-ip-ranges"
173+
for I_F in "${I_FILES[@]}"; do
174+
if [[ -f "${I_F}" ]] && [[ -s "${I_F}" ]]; then
175+
du -sh "${I_F}"
176+
echo -e "[+] Appending ${I_F} ==> ${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt"
177+
cat "${I_F}" | anew-rs -q "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt"
178+
du -sh "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt"
179+
sort --version-sort --unique "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt" --output "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt"
180+
else
181+
echo -e "\n[-] FATAL: Failed to Find ${I_F}"
182+
exit 1
183+
fi
184+
done
185+
#Filter Domains
186+
if [[ -s "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt" && $(stat -c%s "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt") -gt 100000 ]]; then
187+
echo -e "[+] Cleaning up & Merging ${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt ==> ${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt"
188+
cat "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt" |\
189+
awk -F '[[:space:]]*--[[:space:]]*\\[|\\]' '{print $2}' | tr -s '[:space:]' '\n' |\
190+
sed -E '/^[[:space:]]*$/d; s/^[[:space:]]*\*\.?[[:space:]]*//; s/[A-Z]/\L&/g' |\
191+
sed -E '/([0-9].*){20}/d; s/^[[:space:]]*//; s/[[:space:]]*$//; s/[${}%]//g' | sed 's/[()]//g' |\
192+
sed "s/'//g" | sed 's/"//g' | sed 's/^\.\(.*\)/\1/' | sed 's/^\*//' | sed 's/^\.\(.*\)/\1/' |\
193+
sed 's/^\*//' | sed 's/^\.\(.*\)/\1/' | sed 's/^\*//' | sed '/\./!d' | sed 's/^\.\(.*\)/\1/' |\
194+
sed '/[[:cntrl:]]/d' | sed '/!/d' | sed '/[^[:alnum:][:space:]._-]/d' | sed '/\*/d' |\
195+
sed '/^[^[:alnum:]]/d' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | sort -u -o "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt"
196+
sort --version-sort --unique "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt" --output "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt"
197+
du -sh "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt"
198+
if [[ -s "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt" ]] && \
199+
[[ "$(wc -l < "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt" | tr -cd '0-9')" -gt 100000 ]]; then
200+
echo "[+] Domains: $(wc -l < ${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt)"
201+
#Upload
202+
sync_to_ghcr
203+
#Break
204+
pushd "${TMPDIR}" &>/dev/null
205+
else
206+
echo -e "\n[X] FATAL: Failed to generate Domains\n"
207+
wc -l < "${ORAS_LOCAL}/DATA/sni-ip-ranges/domains.txt"
208+
exit 1
209+
fi
210+
else
211+
echo -e "\n[X] FATAL: Failed to merge Data\n"
212+
du -sh "${ORAS_LOCAL}/DATA/sni-ip-ranges/all.txt"
213+
exit 1
214+
fi
215+
#-------------------------------------------------------#

0 commit comments

Comments
 (0)