-
Notifications
You must be signed in to change notification settings - Fork 492
ollama-utils: add publish and cronjob workflows #1206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6726640
cf84133
430685d
08af444
153c245
4340cd8
cf49489
b24f84d
2f7fec6
8817323
6e2f8c8
ce84830
66d3ecc
36301fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
name: Ollama template update | ||
on: | ||
# push: # for debugging | ||
workflow_dispatch: | ||
schedule: | ||
- cron: '0 7 * * 1' # every monday at 7am, so I'll review it after having a 🥐 | ||
|
||
permissions: | ||
pull-requests: write # for creating PR | ||
issues: write # for adding labels to the created PR | ||
contents: write # for git push new branch | ||
|
||
jobs: | ||
update-ollama-templates: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
if: github.repository == 'huggingface/huggingface.js' | ||
|
||
- name: Prepare | ||
id: prepare | ||
if: github.repository == 'huggingface/huggingface.js' | ||
shell: bash | ||
run: | | ||
git config --global user.name machineuser | ||
git config --global user.email [email protected] | ||
git config --global --add safe.directory "$GITHUB_WORKSPACE" | ||
npm install -g pnpm | ||
CURRENT_DATE=$(date -u +"%Y-%m-%d") | ||
echo "CURRENT_DATE=$CURRENT_DATE" | ||
echo "CURRENT_DATE=$CURRENT_DATE" >> $GITHUB_OUTPUT | ||
|
||
- name: Install dependencies | ||
if: github.repository == 'huggingface/huggingface.js' | ||
shell: bash | ||
run: | | ||
cd packages/ollama-utils | ||
pnpm install --frozen-lockfile | ||
|
||
- name: Run update script | ||
if: github.repository == 'huggingface/huggingface.js' | ||
shell: bash | ||
run: | | ||
cd packages/ollama-utils | ||
pnpm run build:automap | ||
|
||
- name: Check for changed files | ||
id: changes | ||
if: github.repository == 'huggingface/huggingface.js' | ||
shell: bash | ||
env: | ||
CURRENT_DATE: ${{ steps.prepare.outputs.CURRENT_DATE }} | ||
run: | | ||
set -x | ||
|
||
FILE_TO_ADD="packages/ollama-utils/src/chat-template-automap.ts" | ||
|
||
git status | ||
modified_files="$(git status -s)" | ||
echo "Modified files: ${modified_files}" | ||
if [ -n "${modified_files}" ]; then | ||
NEW_BRANCH="ollama-${CURRENT_DATE}" | ||
echo "NEW_BRANCH=${NEW_BRANCH}" | ||
echo "Changes detected, will create a new branch:" | ||
echo "${modified_files}" | ||
git add "${FILE_TO_ADD}" | ||
git commit -m "ollama update ${CURRENT_DATE}" | ||
git checkout -b "${NEW_BRANCH}" | ||
git push -f origin "${NEW_BRANCH}" | ||
echo "HAS_CHANGES=true" >> $GITHUB_OUTPUT | ||
echo "NEW_BRANCH=${NEW_BRANCH}" >> $GITHUB_OUTPUT | ||
else | ||
echo "No files changed, skipping..." | ||
echo "HAS_CHANGES=false" >> $GITHUB_OUTPUT | ||
fi | ||
|
||
- name: Create PR | ||
if: steps.changes.outputs.HAS_CHANGES == 'true' && github.repository == 'huggingface/huggingface.js' | ||
uses: actions/github-script@v6 | ||
env: | ||
CURRENT_DATE: ${{ steps.prepare.outputs.CURRENT_DATE }} | ||
NEW_BRANCH: ${{ steps.changes.outputs.NEW_BRANCH }} | ||
with: | ||
script: | | ||
const { repo, owner } = context.repo; | ||
const currDate = process.env.CURRENT_DATE; | ||
const newBranch = process.env.NEW_BRANCH; | ||
|
||
const result = await github.rest.pulls.create({ | ||
title: '[ollama-utils] 🤖 Auto-update chat templates (' + currDate + ')', | ||
owner, | ||
repo, | ||
head: newBranch, | ||
base: 'main', | ||
body: [ | ||
'This PR is auto-generated by', | ||
'[generate-automap.ts](https://github.com/huggingface/huggingface.js/blob/main/packages/ollama-utils/scripts/generate-automap.ts).' | ||
].join('\n') | ||
}); | ||
|
||
console.log({ result }); | ||
// github.rest.issues.addLabels({ | ||
// owner, | ||
// repo, | ||
// issue_number: result.data.number, | ||
// labels: ['feature', 'automated pr'] | ||
// }); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
name: Ollama Utils - Version and Release | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
newversion: | ||
type: choice | ||
description: "Semantic Version Bump Type" | ||
default: patch | ||
options: | ||
- patch | ||
- minor | ||
- major | ||
|
||
concurrency: | ||
group: "push-to-main" | ||
|
||
defaults: | ||
run: | ||
working-directory: packages/ollama-utils | ||
|
||
jobs: | ||
version_and_release: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
# Needed to push the tag and the commit on the main branch, otherwise we get: | ||
# > Run git push --follow-tags | ||
# remote: error: GH006: Protected branch update failed for refs/heads/main. | ||
# remote: error: Changes must be made through a pull request. Required status check "lint" is expected. | ||
token: ${{ secrets.BOT_ACCESS_TOKEN }} | ||
- run: npm install -g corepack@latest && corepack enable | ||
- uses: actions/setup-node@v3 | ||
with: | ||
node-version: "20" | ||
cache: "pnpm" | ||
cache-dependency-path: | | ||
packages/ollama-utils/pnpm-lock.yaml | ||
# setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED | ||
registry-url: "https://registry.npmjs.org" | ||
- run: pnpm install | ||
- run: git config --global user.name machineuser | ||
- run: git config --global user.email [email protected] | ||
- run: | | ||
PACKAGE_VERSION=$(node -p "require('./package.json').version") | ||
BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") | ||
# Update package.json with the new version | ||
node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" | ||
git commit . -m "🔖 @huggingface/ollama-utils $BUMPED_VERSION" | ||
git tag "ollama-utils-v$BUMPED_VERSION" | ||
|
||
- name: "Check Deps are published before publishing this package" | ||
run: pnpm -w check-deps tasks | ||
|
||
- run: pnpm publish --no-git-checks . | ||
env: | ||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} | ||
- run: git pull --rebase && git push --follow-tags | ||
# hack - reuse actions/setup-node@v3 just to set a new registry | ||
- uses: actions/setup-node@v3 | ||
with: | ||
node-version: "20" | ||
registry-url: "https://npm.pkg.github.com" | ||
# Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) | ||
# - run: pnpm publish --no-git-checks . | ||
# env: | ||
# NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,6 +107,9 @@ const getSpecialTokens = (tmpl: string): string[] => { | |
|
||
nDoing = 0; | ||
nAll = modelsWithTag.length; | ||
const addedModels: string[] = []; | ||
const skippedModelsDueToErr: string[] = []; | ||
|
||
const workerGetTemplate = async () => { | ||
while (true) { | ||
const modelWithTag = modelsWithTag.shift(); | ||
|
@@ -137,44 +140,52 @@ const getSpecialTokens = (tmpl: string): string[] => { | |
try { | ||
ggufData = await gguf(modelUrl); | ||
} catch (e) { | ||
console.log(" --> [X] FATAL: GGUF error", { model, tag, modelUrl }); | ||
throw e; // rethrow | ||
console.log(` --> [X] Skipping ${modelWithTag} due to error while calling gguf()`, e); | ||
skippedModelsDueToErr.push(modelWithTag); | ||
continue; | ||
} | ||
const { metadata } = ggufData; | ||
const ggufTmpl = metadata["tokenizer.chat_template"]; | ||
if (ggufTmpl) { | ||
if (seenGGUFTemplate.has(ggufTmpl)) { | ||
console.log(" --> Already seen this GGUF template, skip..."); | ||
try { | ||
if (seenGGUFTemplate.has(ggufTmpl)) { | ||
console.log(" --> Already seen this GGUF template, skip..."); | ||
continue; | ||
} | ||
seenGGUFTemplate.add(ggufTmpl); | ||
console.log(" --> GGUF chat template OK"); | ||
const tmplBlob = manifest.layers.find((l) => l.mediaType.match(/\.template/)); | ||
if (!tmplBlob) continue; | ||
const ollamaTmplUrl = getBlobUrl(tmplBlob.digest); | ||
if (!ollamaTmplUrl) { | ||
console.log(" --> [X] No ollama template"); | ||
continue; | ||
} | ||
const ollamaTmpl = await (await fetch(ollamaTmplUrl)).text(); | ||
console.log(" --> All OK"); | ||
const record: OutputItem = { | ||
model: modelWithTag, | ||
gguf: ggufTmpl, | ||
ollama: { | ||
template: ollamaTmpl, | ||
tokens: getSpecialTokens(ggufTmpl), | ||
}, | ||
}; | ||
// get params | ||
const ollamaParamsBlob = manifest.layers.find((l) => l.mediaType.match(/\.params/)); | ||
const ollamaParamsUrl = ollamaParamsBlob ? getBlobUrl(ollamaParamsBlob.digest) : null; | ||
if (ollamaParamsUrl) { | ||
console.log(" --> Got params"); | ||
record.ollama.params = await (await fetch(ollamaParamsUrl)).json(); | ||
} | ||
output.push(record); | ||
addedModels.push(modelWithTag); | ||
if (DEBUG) appendFileSync("ollama_tmp.jsonl", JSON.stringify(record) + "\n"); | ||
} catch (e) { | ||
console.log(` --> [X] Skipping ${modelWithTag} due to error`, e); | ||
skippedModelsDueToErr.push(modelWithTag); | ||
continue; | ||
} | ||
seenGGUFTemplate.add(ggufTmpl); | ||
console.log(" --> GGUF chat template OK"); | ||
const tmplBlob = manifest.layers.find((l) => l.mediaType.match(/\.template/)); | ||
if (!tmplBlob) continue; | ||
const ollamaTmplUrl = getBlobUrl(tmplBlob.digest); | ||
if (!ollamaTmplUrl) { | ||
console.log(" --> [X] No ollama template"); | ||
continue; | ||
} | ||
const ollamaTmpl = await (await fetch(ollamaTmplUrl)).text(); | ||
console.log(" --> All OK"); | ||
const record: OutputItem = { | ||
model: modelWithTag, | ||
gguf: ggufTmpl, | ||
ollama: { | ||
template: ollamaTmpl, | ||
tokens: getSpecialTokens(ggufTmpl), | ||
}, | ||
}; | ||
// get params | ||
const ollamaParamsBlob = manifest.layers.find((l) => l.mediaType.match(/\.params/)); | ||
const ollamaParamsUrl = ollamaParamsBlob ? getBlobUrl(ollamaParamsBlob.digest) : null; | ||
if (ollamaParamsUrl) { | ||
console.log(" --> Got params"); | ||
record.ollama.params = await (await fetch(ollamaParamsUrl)).json(); | ||
} | ||
output.push(record); | ||
if (DEBUG) appendFileSync("ollama_tmp.jsonl", JSON.stringify(record) + "\n"); | ||
} else { | ||
console.log(" --> [X] No GGUF template"); | ||
continue; | ||
|
@@ -190,7 +201,13 @@ const getSpecialTokens = (tmpl: string): string[] => { | |
.map(() => workerGetTemplate()) | ||
); | ||
|
||
console.log("===================================="); | ||
console.log("DONE"); | ||
console.log("Added templates for:"); | ||
console.log(addedModels.join("\n")); | ||
console.log("Skipped these models due to error:"); | ||
console.log(skippedModelsDueToErr.join("\n")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use console.error() for better error differentiation? This would help separate errors from regular logs, making debugging and log filtering easier. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure why stderr sometimes come out-of-order on github CI output, so I'm sticking with |
||
|
||
output.sort((a, b) => a.model.localeCompare(b.model)); | ||
|
||
writeFileSync( | ||
|
@@ -201,6 +218,11 @@ const getSpecialTokens = (tmpl: string): string[] => { | |
|
||
import { OllamaChatTemplateMapEntry } from "./types"; | ||
|
||
/** | ||
* Skipped these models due to error: | ||
${skippedModelsDueToErr.map((m) => ` * - ${m}`).join("\n")} | ||
*/ | ||
|
||
export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = ${JSON.stringify(output, null, "\t")}; | ||
`.trim() | ||
); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to be careful about injection when writing stuff to env in github actions - but I think it's fine in this case 👍 (no user-provided ref or branch name or file name in the vars)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I acknowledge about this class of attack. In fact, that's why I always pass variable firstly via
env
and then access the environment variables in the code. So even if the user try to inject via${{ steps.changes.outputs.NEW_BRANCH }}
for example, it will still be contained inside a string.In other words, a bad approach will be:
Good approach:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Please note that the above is an example. Indeed, none of the variables in the script is user-controllable as you pointed out)