Skip to content

Commit c16eb36

Browse files
Build top huggiface models
Signed-off-by: Avinash Singh <[email protected]>
1 parent 9f83166 commit c16eb36

File tree

4 files changed

+728
-0
lines changed

4 files changed

+728
-0
lines changed
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
name: Build Top HuggingFace Models
2+
3+
on:
4+
schedule:
5+
# Run weekly on Sunday at 00:00 UTC
6+
- cron: '0 0 * * 0'
7+
workflow_dispatch:
8+
inputs:
9+
limit:
10+
description: 'Number of models to build'
11+
required: false
12+
default: '10'
13+
type: string
14+
max_size:
15+
description: 'Maximum model size in GB'
16+
required: false
17+
default: '10'
18+
type: string
19+
sort_by:
20+
description: 'Sort criteria'
21+
required: false
22+
default: 'downloads'
23+
type: choice
24+
options:
25+
- downloads
26+
- likes
27+
- trending
28+
29+
permissions:
30+
contents: read
31+
packages: write
32+
33+
env:
34+
REGISTRY: ghcr.io
35+
ORGANIZATION: ${{ github.repository_owner }}
36+
37+
jobs:
38+
select-models:
39+
name: Select Top Models
40+
runs-on: ubuntu-latest
41+
outputs:
42+
models: ${{ steps.select-models.outputs.models }}
43+
steps:
44+
- name: Checkout code
45+
uses: actions/checkout@v4
46+
47+
- name: Set up Python
48+
uses: actions/setup-python@v5
49+
with:
50+
python-version: "3.14"
51+
52+
- name: Install dependencies
53+
run: |
54+
pip install -r contrib/scripts/requirements.txt
55+
56+
- name: Select compatible models
57+
id: select-models
58+
env:
59+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
60+
run: |
61+
python contrib/scripts/select-top-models.py \
62+
--limit ${{ github.event.inputs.limit || '10' }} \
63+
--max-size ${{ github.event.inputs.max_size || '10' }} \
64+
--sort-by ${{ github.event.inputs.sort_by || 'downloads' }} \
65+
--output models.json
66+
67+
# Convert to single line JSON for GitHub output
68+
echo "models=$(cat models.json | jq -c)" >> $GITHUB_OUTPUT
69+
70+
# Also display selected models for debugging
71+
echo "Selected models:"
72+
cat models.json | jq -r '.[] | "\(.id) (\(.format), \(.size_gb)GB)"'
73+
74+
build-modctl:
75+
name: Build modctl
76+
runs-on: ubuntu-latest
77+
env:
78+
PACKAGE_DIR: modctl-build-package
79+
steps:
80+
- name: Checkout code
81+
uses: actions/checkout@v4
82+
with:
83+
submodules: recursive
84+
85+
- name: Install Go
86+
uses: actions/setup-go@v5
87+
with:
88+
go-version-file: go.mod
89+
cache-dependency-path: go.sum
90+
91+
- name: Create Cache Dir
92+
run: |
93+
mkdir -p ${{ env.PACKAGE_DIR }}
94+
95+
- name: Cache Package
96+
id: cache-package
97+
uses: actions/cache@v4
98+
with:
99+
path: ${{ env.PACKAGE_DIR }}
100+
key: modctl-build-packages
101+
102+
- name: Install dependencies
103+
run: |
104+
sudo apt-get update
105+
sudo apt-get install -y pkg-config
106+
sudo DEBIAN_FRONTEND=noninteractive apt install -y build-essential \
107+
cmake pkg-config libssl-dev libssh2-1-dev zlib1g-dev \
108+
libhttp-parser-dev python3 wget tar git
109+
mkdir -p ${{ env.PACKAGE_DIR }}
110+
if [ ! -f "${{ env.PACKAGE_DIR }}/libgit2-v1.5.1.tar.gz" ]; then
111+
wget https://github.com/libgit2/libgit2/archive/refs/tags/v1.5.1.tar.gz -O ${{ env.PACKAGE_DIR }}/libgit2-v1.5.1.tar.gz
112+
fi
113+
tar -xzf ${{ env.PACKAGE_DIR }}/libgit2-v1.5.1.tar.gz
114+
cd libgit2-1.5.1 && mkdir build && cd build
115+
cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF
116+
make -j$(nproc)
117+
sudo make install
118+
sudo ldconfig
119+
env:
120+
LIBGIT2_SYS_USE_PKG_CONFIG: "1"
121+
122+
- name: Build modctl
123+
run: |
124+
go build -tags "static system_libgit2 enable_libgit2"
125+
126+
- name: Upload modctl
127+
uses: actions/upload-artifact@v4
128+
with:
129+
name: modctl-artifact
130+
path: modctl
131+
132+
build-and-push-models:
133+
name: Build ${{ matrix.model.id }}
134+
needs: [select-models, build-modctl]
135+
runs-on: ubuntu-latest
136+
timeout-minutes: 120
137+
strategy:
138+
fail-fast: false
139+
max-parallel: 3 # Don't overwhelm GHCR
140+
matrix:
141+
model: ${{ fromJson(needs.select-models.outputs.models) }}
142+
env:
143+
MODEL_ID: ${{ matrix.model.id }}
144+
MODEL_FAMILY: ${{ matrix.model.family }}
145+
MODEL_FORMAT: ${{ matrix.model.format }}
146+
MODEL_PARAM_SIZE: ${{ matrix.model.param_size }}
147+
MODEL_DIR: model-files
148+
steps:
149+
- name: Download modctl artifact
150+
uses: actions/download-artifact@v4
151+
with:
152+
name: modctl-artifact
153+
path: modctl
154+
155+
- name: Setup modctl
156+
run: |
157+
sudo cp modctl/modctl /bin/modctl
158+
sudo chmod +x /bin/modctl
159+
modctl version
160+
modctl login -u ${{ github.actor }} \
161+
-p ${{ secrets.GITHUB_TOKEN }} \
162+
${{ env.REGISTRY }}
163+
164+
- name: Set up Python
165+
uses: actions/setup-python@v5
166+
with:
167+
python-version: "3.10"
168+
169+
- name: Cache model
170+
uses: actions/cache@v4
171+
id: cache-model
172+
with:
173+
path: ${{ env.MODEL_DIR }}
174+
key: model-${{ env.MODEL_ID }}-${{ hashFiles('**/config.json') }}
175+
176+
- name: Download HuggingFace Model
177+
if: steps.cache-model.outputs.cache-hit != 'true'
178+
env:
179+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
180+
run: |
181+
pip install 'huggingface_hub'
182+
python << 'EOF'
183+
from huggingface_hub import snapshot_download
184+
import os
185+
186+
model_id = os.environ['MODEL_ID']
187+
model_dir = os.environ['MODEL_DIR']
188+
189+
print(f"Downloading {model_id}...")
190+
snapshot_download(
191+
repo_id=model_id,
192+
local_dir=model_dir,
193+
token=os.environ.get('HF_TOKEN')
194+
)
195+
print(f"Download complete: {model_dir}")
196+
EOF
197+
198+
- name: Generate Modelfile
199+
run: |
200+
cd ${{ env.MODEL_DIR }}
201+
echo "Generating Modelfile for ${{ env.MODEL_ID }}"
202+
modctl modelfile generate \
203+
--arch transformer \
204+
--family ${{ env.MODEL_FAMILY }} \
205+
--format ${{ env.MODEL_FORMAT }} \
206+
--param-size ${{ env.MODEL_PARAM_SIZE }} \
207+
.
208+
209+
echo "Generated Modelfile:"
210+
cat Modelfile
211+
212+
- name: Build and Push Model
213+
run: |
214+
cd ${{ env.MODEL_DIR }}
215+
216+
# Convert model ID to valid image name (lowercase, replace / with -)
217+
IMAGE_NAME=$(echo "${{ env.MODEL_ID }}" | tr '[:upper:]' '[:lower:]' | tr '/' '-')
218+
IMAGE_URL="${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/${IMAGE_NAME}:latest"
219+
220+
echo "Building and pushing to ${IMAGE_URL}"
221+
222+
modctl build -f Modelfile \
223+
-t ${IMAGE_URL} \
224+
--raw --output-remote --log-level debug \
225+
.
226+
227+
echo "Successfully pushed ${IMAGE_URL}"
228+
229+
- name: Cleanup HuggingFace Model Files
230+
run: |
231+
echo "Cleaning up HuggingFace model files to free disk space..."
232+
du -sh ${{ env.MODEL_DIR }} || true
233+
rm -rf ${{ env.MODEL_DIR }}
234+
echo "Cleanup complete"
235+
df -h
236+
237+
- name: Verify Pull
238+
run: |
239+
# Convert model ID to valid image name
240+
IMAGE_NAME=$(echo "${{ env.MODEL_ID }}" | tr '[:upper:]' '[:lower:]' | tr '/' '-')
241+
IMAGE_URL="${{ env.REGISTRY }}/${{ env.ORGANIZATION }}/${IMAGE_NAME}:latest"
242+
243+
mkdir -p verify-download
244+
echo "Pulling ${IMAGE_URL} to verify"
245+
246+
modctl pull ${IMAGE_URL} \
247+
--extract-dir verify-download \
248+
--log-level debug
249+
250+
echo "Successfully verified pull from ${IMAGE_URL}"
251+
252+
summary:
253+
name: Build Summary
254+
needs: [select-models, build-and-push-models]
255+
runs-on: ubuntu-latest
256+
if: always()
257+
steps:
258+
- name: Generate Summary
259+
run: |
260+
echo "# Build Top Models Summary" >> $GITHUB_STEP_SUMMARY
261+
echo "" >> $GITHUB_STEP_SUMMARY
262+
echo "## Selected Models" >> $GITHUB_STEP_SUMMARY
263+
echo "" >> $GITHUB_STEP_SUMMARY
264+
echo '${{ needs.select-models.outputs.models }}' | jq -r '.[] | "- **\(.id)** (\(.format), \(.param_size), \(.size_gb)GB) - \(.downloads) downloads"' >> $GITHUB_STEP_SUMMARY || true
265+
echo "" >> $GITHUB_STEP_SUMMARY
266+
echo "## Build Status" >> $GITHUB_STEP_SUMMARY
267+
echo "Check individual job results above." >> $GITHUB_STEP_SUMMARY

contrib/scripts/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
huggingface_hub>=0.20.0

0 commit comments

Comments
 (0)