1+ #! /usr/bin/env bash
2+ # # <DO NOT RUN STANDALONE, meant for CI Only>
3+ # # Meant to merge sni-ip-ranges ==> GHCR
4+ # # Self: https://raw.githubusercontent.com/pkgforge-security/domains/refs/heads/main/scripts/merge_sni-ip-ranges_ghcr.sh
5+ # -------------------------------------------------------#
6+
7+ # -------------------------------------------------------#
8+ # #Sanity
9+ if ! command -v anew-rs & > /dev/null; then
10+ echo -e " [-] Failed to find anew-rs\n"
11+ exit 1
12+ fi
13+ if [ -z " ${GHCR_TOKEN+x} " ] || [ -z " ${GHCR_TOKEN##* [[:space:]]} " ]; then
14+ echo -e " \n[-] FATAL: Failed to Find GHCR_TOKEN (\$ {GHCR_TOKEN}\n"
15+ exit 1
16+ fi
17+ if ! command -v oras & > /dev/null; then
18+ echo -e " [-] Failed to find oras\n"
19+ exit 1
20+ else
21+ oras login --username " Azathothas" --password " ${GHCR_TOKEN} " " ghcr.io"
22+ fi
23+ # #ENV
24+ export TZ=" UTC"
25+ SYSTMP=" $( dirname $( mktemp -u) ) " && export SYSTMP=" ${SYSTMP} "
26+ TMPDIR=" $( mktemp -d) " && export TMPDIR=" ${TMPDIR} " ; echo -e " \n[+] Using TEMP: ${TMPDIR} \n"
27+ rm -rf " ${SYSTMP} /DATA" 2> /dev/null ; mkdir -p " ${SYSTMP} /DATA"
28+ if [[ -z " ${USER_AGENT} " ]]; then
29+ USER_AGENT=" $( curl -qfsSL ' https://raw.githubusercontent.com/pkgforge/devscripts/refs/heads/main/Misc/User-Agents/ua_firefox_macos_latest.txt' ) "
30+ fi
31+ # #Repo
32+ ORAS_LOCAL=" $( mktemp -d) "
33+ GHCRPKG_URL=" ghcr.io/pkgforge-security/domains/sni-ip-ranges"
34+ PKG_WEBPAGE=" $( echo " https://github.com/pkgforge-security/domains" | sed ' s|^/*||; s|/*$||' | tr -d ' [:space:]' ) "
35+ export GHCRPKG_URL ORAS_LOCAL PKG_WEBPAGE
36+ # -------------------------------------------------------#
37+
38+ # -------------------------------------------------------#
39+ # #Func
40+ sync_to_ghcr ()
41+ {
42+ if [[ -d " ${ORAS_LOCAL} /DATA/sni-ip-ranges" ]] && \
43+ [[ " $( du -s " ${ORAS_LOCAL} /DATA/sni-ip-ranges" | cut -f1 | tr -cd ' 0-9' | tr -d ' [:space:]' ) " -gt 1000 ]]; then
44+ pushd " ${ORAS_LOCAL} " & > /dev/null && \
45+ unset GHCRPKG_TAG MODTIME_TEMP
46+ MODTIME=" $( date --utc ' +%Y-%m-%d_T%H-%M-%S' | sed ' s/ZZ\+/Z/Ig' | tr -d ' [:space:]' ) "
47+ GHCRPKG_TAG=" $( echo " merged-${MODTIME} " | sed ' s/[^a-zA-Z0-9._-]/_/g; s/_*$//' ) "
48+ export GHCRPKG_TAG MODTIME
49+ # Check Tag
50+ if ! oras manifest fetch " ${GHCRPKG_URL} :${GHCRPKG_TAG} " | \
51+ jq -r ' .annotations["org.opencontainers.image.created"]' | tr -d ' [:space:]' | \
52+ grep -qiE ' [0-9]{4}-[0-9]{2}-[0-9]{2}' ; then
53+ oras push --debug --config " /dev/null:application/vnd.oci.empty.v1+json" " ${GHCRPKG_URL} :${GHCRPKG_TAG} "
54+ sleep 2
55+ fi
56+ # Construct Upload CMD
57+ ghcr_push_cmd ()
58+ {
59+ for i in {1..10}; do
60+ pushd " ${ORAS_LOCAL} " & > /dev/null
61+ if [ -z " ${PKG_DATE+x} " ] || [ -z " ${PKG_DATE##* [[:space:]]} " ]; then
62+ PKG_DATETMP=" $( date --utc +%Y-%m-%dT%H:%M:%S) Z"
63+ PKG_DATE=" $( echo " ${PKG_DATETMP} " | sed ' s/ZZ\+/Z/Ig' | tr -d ' [:space:]' ) "
64+ fi
65+ # unset ghcr_push ; ghcr_push=(oras push --concurrency "10" --disable-path-validation)
66+ unset ghcr_push ; ghcr_push=(oras push --disable-path-validation)
67+ ghcr_push+=(--config " /dev/null:application/vnd.oci.empty.v1+json" )
68+ ghcr_push+=(--annotation " com.github.package.type=container" )
69+ ghcr_push+=(--annotation " dev.pkgforge-security.domains.upload_date=${PKG_DATE} " )
70+ ghcr_push+=(--annotation " org.opencontainers.image.authors=https://docs.pkgforge.dev/contact/chat" )
71+ ghcr_push+=(--annotation " org.opencontainers.image.created=${PKG_DATE} " )
72+ ghcr_push+=(--annotation " org.opencontainers.image.description=sni-ip-ranges-data-merged-${MODTIME} " )
73+ ghcr_push+=(--annotation " org.opencontainers.image.documentation=${PKG_WEBPAGE} " )
74+ ghcr_push+=(--annotation " org.opencontainers.image.licenses=blessing" )
75+ ghcr_push+=(--annotation " org.opencontainers.image.ref.name=merged-${MODTIME} " )
76+ ghcr_push+=(--annotation " org.opencontainers.image.revision=merged-${MODTIME} " )
77+ ghcr_push+=(--annotation " org.opencontainers.image.source=${PKG_WEBPAGE} " )
78+ ghcr_push+=(--annotation " org.opencontainers.image.title=sni-ip-ranges-merged-${MODTIME} " )
79+ ghcr_push+=(--annotation " org.opencontainers.image.url=${SRC_URL} " )
80+ ghcr_push+=(--annotation " org.opencontainers.image.vendor=pkgforge-security" )
81+ ghcr_push+=(--annotation " org.opencontainers.image.version=merged-${MODTIME} " )
82+ ghcr_push+=(" ${GHCRPKG_URL} :${GHCRPKG_TAG} ,merged" )
83+ pushd " ${ORAS_LOCAL} /DATA/sni-ip-ranges" & > /dev/null
84+ oras_files=() ; mapfile -t oras_files < <( find " ." -maxdepth 1 -type f -not -path " */\.*" -print 2> /dev/null)
85+ for o_f in " ${oras_files[@]} " ; do
86+ [[ -f " ${o_f} " && -s " ${o_f} " ]] && ghcr_push+=(" ${o_f} " )
87+ done
88+ " ${ghcr_push[@]} " ; sleep 5
89+ # Check
90+ if [[ " $( oras manifest fetch " ${GHCRPKG_URL} :${GHCRPKG_TAG} " | jq -r ' .annotations["dev.pkgforge-security.domains.upload_date"]' | tr -d ' [:space:]' ) " == " ${PKG_DATE} " ]]; then
91+ echo -e " \n[+] Registry --> https://${GHCRPKG_URL} "
92+ pushd " ${TMPDIR} " & > /dev/null ; return
93+ else
94+ echo -e " \n[-] Failed to Push Artifact to ${GHCRPKG_URL} :${GHCRPKG_TAG} (Retrying ${i} /10)\n"
95+ fi
96+ sleep " $( shuf -i 500-4500 -n 1) e-3"
97+ done
98+ }
99+ export -f ghcr_push_cmd
100+ # First Set of tries
101+ ghcr_push_cmd
102+ # Check if Failed
103+ if [[ " $( oras manifest fetch " ${GHCRPKG_URL} :${GHCRPKG_TAG} " | jq -r ' .annotations["dev.pkgforge-security.domains.upload_date"]' | tr -d ' [:space:]' ) " != " ${PKG_DATE} " ]]; then
104+ echo -e " \n[✗] Failed to Push Artifact to ${GHCRPKG_URL} :${GHCRPKG_TAG} \n"
105+ # Second set of Tries
106+ echo -e " \n[-] Retrying ...\n"
107+ ghcr_push_cmd
108+ if [[ " $( oras manifest fetch " ${GHCRPKG_URL} :${GHCRPKG_TAG} " | jq -r ' .annotations["dev.pkgforge-security.domains.upload_date"]' | tr -d ' [:space:]' ) " != " ${PKG_DATE} " ]]; then
109+ oras manifest fetch " ${GHCRPKG_URL} :${GHCRPKG_TAG} " | jq .
110+ echo -e " \n[✗] Failed to Push Artifact to ${GHCRPKG_URL} :${GHCRPKG_TAG} \n"
111+ pushd " ${TMPDIR} " & > /dev/null ; return
112+ fi
113+ fi
114+ du -sh " ${ORAS_LOCAL} /DATA/sni-ip-ranges" && realpath " ${ORAS_LOCAL} /DATA/sni-ip-ranges"
115+ fi
116+ }
117+ export -f sync_to_ghcr
118+ # -------------------------------------------------------#
119+
120+ # -------------------------------------------------------#
121+ # #SRC//DEST
122+ # https://huggingface.co/datasets/pkgforge-security/domains/tree/main/DATA/sni-ip-ranges
123+ pushd " ${TMPDIR} " & > /dev/null
124+ I_SRCS=(amazon digitalocean google microsoft oracle)
125+ echo -e " \n[+] Data: ${I_SRCS[*]} \n"
126+ if [[ -n " ${I_SRCS[*]} " && " ${# I_SRCS[@]} " -ge 1 ]]; then
127+ # Check
128+ unset I_D SRC_URL_STATUS SRC_URL_TMP
129+ SRC_URL_TMP=" https://huggingface.co/datasets/pkgforge-security/domains/tree/main/DATA/sni-ip-ranges"
130+ SRC_URL_STATUS=" $( curl -X " HEAD" -qfksSL " ${SRC_URL_TMP} " -I | sed -n ' s/^[[:space:]]*HTTP\/[0-9.]*[[:space:]]\+\([0-9]\+\).*/\1/p' | tail -n1 | tr -d ' [:space:]' ) "
131+ if echo " ${SRC_URL_STATUS} " | grep -qiv ' 200$' ; then
132+ SRC_URL_STATUS=" $( curl -A " ${USER_AGENT} " -X " HEAD" -qfksSL " ${SRC_URL_TMP} " -I | sed -n ' s/^[[:space:]]*HTTP\/[0-9.]*[[:space:]]\+\([0-9]\+\).*/\1/p' | tail -n1 | tr -d ' [:space:]' ) "
133+ echo -e " \n[-] FATAL: Server seems to be Offline\n"
134+ curl -A " ${USER_AGENT} " -w " (SERVER) <== %{url}\n" -X " HEAD" -qfksSL " ${SRC_URL_TMP} " -I ; echo -e " \n"
135+ exit 1
136+ elif [[ " ${SRC_URL_STATUS} " == " 200" ]]; then
137+ SRC_URL=" https://huggingface.co/datasets/pkgforge-security/domains/resolve/main/DATA/sni-ip-ranges"
138+ fi
139+ echo -e " \n[+] Server ==> ${SRC_URL_TMP} "
140+ # Download
141+ for I_D in " ${I_SRCS[@]} " ; do
142+ echo -e " \n[+] Processing ${I_D} "
143+ # Set
144+ unset INPUT_TMP
145+ INPUT_TMP=" $( echo " ${I_D} " | tr -d ' [:space:]' ) "
146+ # Get
147+ for i in {1..2}; do
148+ curl -A " ${USER_AGENT} " -w " (DL) <== %{url}\n" -kfSL " ${SRC_URL} /${I_D} .txt" --retry 3 --retry-all-errors -o " ${TMPDIR} /${I_D} .txt"
149+ if [[ -s " ${TMPDIR} /${I_D} .txt" && $( stat -c%s " ${TMPDIR} /${I_D} .txt" ) -gt 1000 ]]; then
150+ du -sh " ${TMPDIR} /${I_D} .txt"
151+ pushd " ${TMPDIR} " & > /dev/null ; break
152+ else
153+ echo " Retrying... ${i} /2"
154+ sleep 2
155+ fi
156+ done
157+ done
158+ else
159+ echo -e " \n[-] FATAL: Failed to Set Sources\n"
160+ echo -e " [+] Sources : ${I_SRCS[*]} "
161+ exit 1
162+ fi
163+ # -------------------------------------------------------#
164+
165+ # -------------------------------------------------------#
166+ # Main
167+ pushd " ${TMPDIR} " & > /dev/null
168+ unset I_F I_FILES
169+ I_FILES=(" ${TMPDIR} /amazon.txt" " ${TMPDIR} /digitalocean.txt" " ${TMPDIR} /google.txt" " ${TMPDIR} /microsoft.txt" " ${TMPDIR} /oracle.txt" )
170+ # Check & Merge
171+ rm -rf " ${ORAS_LOCAL} /DATA/sni-ip-ranges" 2> /dev/null
172+ mkdir -p " ${ORAS_LOCAL} /DATA/sni-ip-ranges"
173+ for I_F in " ${I_FILES[@]} " ; do
174+ if [[ -f " ${I_F} " ]] && [[ -s " ${I_F} " ]]; then
175+ du -sh " ${I_F} "
176+ echo -e " [+] Appending ${I_F} ==> ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt"
177+ cat " ${I_F} " | anew-rs -q " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt"
178+ du -sh " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt"
179+ sort --version-sort --unique " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt" --output " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt"
180+ else
181+ echo -e " \n[-] FATAL: Failed to Find ${I_F} "
182+ exit 1
183+ fi
184+ done
185+ # Filter Domains
186+ if [[ -s " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt" && $( stat -c%s " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt" ) -gt 100000 ]]; then
187+ echo -e " [+] Cleaning up & Merging ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt ==> ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt"
188+ cat " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt" | \
189+ awk -F ' [[:space:]]*--[[:space:]]*\\[|\\]' ' {print $2}' | tr -s ' [:space:]' ' \n' | \
190+ sed -E ' /^[[:space:]]*$/d; s/^[[:space:]]*\*\.?[[:space:]]*//; s/[A-Z]/\L&/g' | \
191+ sed -E ' /([0-9].*){20}/d; s/^[[:space:]]*//; s/[[:space:]]*$//; s/[${}%]//g' | sed ' s/[()]//g' | \
192+ sed " s/'//g" | sed ' s/"//g' | sed ' s/^\.\(.*\)/\1/' | sed ' s/^\*//' | sed ' s/^\.\(.*\)/\1/' | \
193+ sed ' s/^\*//' | sed ' s/^\.\(.*\)/\1/' | sed ' s/^\*//' | sed ' /\./!d' | sed ' s/^\.\(.*\)/\1/' | \
194+ sed ' /[[:cntrl:]]/d' | sed ' /!/d' | sed ' /[^[:alnum:][:space:]._-]/d' | sed ' /\*/d' | \
195+ sed ' /^[^[:alnum:]]/d' | sed ' s/^[[:space:]]*//;s/[[:space:]]*$//' | sort -u -o " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt"
196+ sort --version-sort --unique " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt" --output " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt"
197+ du -sh " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt"
198+ if [[ -s " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt" ]] && \
199+ [[ " $( wc -l < " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt" | tr -cd ' 0-9' ) " -gt 100000 ]]; then
200+ echo " [+] Domains: $( wc -l < ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt) "
201+ # Upload
202+ sync_to_ghcr
203+ # Break
204+ pushd " ${TMPDIR} " & > /dev/null
205+ else
206+ echo -e " \n[X] FATAL: Failed to generate Domains\n"
207+ wc -l < " ${ORAS_LOCAL} /DATA/sni-ip-ranges/domains.txt"
208+ exit 1
209+ fi
210+ else
211+ echo -e " \n[X] FATAL: Failed to merge Data\n"
212+ du -sh " ${ORAS_LOCAL} /DATA/sni-ip-ranges/all.txt"
213+ exit 1
214+ fi
215+ # -------------------------------------------------------#
0 commit comments