Skip to content

Commit 6b5a43e

Browse files
authored
Merge pull request #209 from DefangLabs/ollama
Ollama
2 parents 30e09cc + 3c9fa78 commit 6b5a43e

27 files changed

+7453
-4
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
FROM mcr.microsoft.com/devcontainers/python:3.12-bookworm
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"build": {
3+
"dockerfile": "Dockerfile",
4+
"context": ".."
5+
},
6+
"features": {
7+
"ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {},
8+
"ghcr.io/devcontainers/features/docker-in-docker:2": {},
9+
"ghcr.io/devcontainers/features/node:1": {}
10+
}
11+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: Deploy
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
deploy:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: read
13+
id-token: write
14+
15+
steps:
16+
- name: Checkout Repo
17+
uses: actions/checkout@v4
18+
19+
- name: Deploy
20+
uses: DefangLabs/[email protected]

samples/ollama/README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Ollama
2+
3+
[![1-click-deploy](https://defang.io/deploy-with-defang.png)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-ollama-template%26template_owner%3DDefangSamples)
4+
5+
This sample demonstrates how to deploy [Ollama](https://ollama.com/) with Defang, along with a Next.js frontend using the [AI SDK](https://www.npmjs.com/package/ai) for smooth streaming conversations. By default it runs a very small model (`llama3.2:1b`) which can perform well with just a CPU, but we've included lines that you can uncomment in the compose file to enable GPU support and run a larger model like `gemma:7b`. If you want to deploy to a GPU powered instance, you will need to use your own AWS account with [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc).
6+
7+
## Prerequisites
8+
9+
1. Download [Defang CLI](https://github.com/DefangLabs/defang)
10+
2. (Optional) If you are using [Defang BYOC](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) authenticated with your AWS account
11+
3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/)
12+
13+
## Development
14+
15+
To run the application locally, you can use the following command:
16+
17+
```bash
18+
docker compose -f compose.dev.yaml up
19+
```
20+
21+
## Deployment
22+
23+
> [!NOTE]
24+
> Download [Defang CLI](https://github.com/DefangLabs/defang)
25+
26+
### Defang Playground
27+
28+
Deploy your application to the defang playground by opening up your terminal and typing `defang up`.
29+
30+
**Keep in mind that the playground does not support GPU instances.**
31+
32+
### BYOC (AWS)
33+
34+
If you want to deploy to your own cloud account, you can use Defang BYOC:
35+
36+
1. [Authenticate your AWS account](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html), and that you have properly set your environment variables like `AWS_PROFILE`, `AWS_REGION`, `AWS_ACCESS_KEY_ID`, and `AWS_SECRET_ACCESS_KEY`.
37+
2. Run `defang up` in a terminal that has access to your AWS environment variables.
38+
39+
---
40+
41+
Title: Ollama
42+
43+
Short Description: Ollama is a tool that lets you easily run large language models.
44+
45+
Tags: AI, LLM, ML, Llama, Mistral, Next.js, AI SDK,
46+
47+
Languages: Typescript

samples/ollama/compose.dev.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
services:
2+
app:
3+
extends:
4+
file: compose.yaml
5+
service: app
6+
7+
ui:
8+
extends:
9+
file: compose.yaml
10+
service: ui
11+
volumes:
12+
- type: bind
13+
source: ./ui
14+
target: /app
15+
command: ["npm", "run", "dev"]

samples/ollama/compose.yaml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
services:
2+
ollama:
3+
# uncomment to add your own domain
4+
# domainname: example.com
5+
build:
6+
context: ./ollama
7+
dockerfile: Dockerfile
8+
shm_size: "16gb"
9+
ports:
10+
- target: 8000
11+
mode: host
12+
deploy:
13+
resources:
14+
reservations:
15+
cpus: '2.0'
16+
memory: 8192M
17+
# Uncomment the next two lines to enable GPU support, for example to use gemma:7b
18+
# NOTE: this is only supported in [BYOC](https://docs.defang.io/docs/concepts/defang-byoc)
19+
# devices:
20+
# - capabilities: ["gpu"]
21+
# By Default we load llama3.2:1b because it can run efficiently on a CPU, but you can select
22+
# a different model by setting the LOAD_MODEL environment variable. Check the [list](https://ollama.com/library)
23+
# for more models. For example, to load gemma:7b, set the LOAD_MODEL environment variable to gemma:7b below.
24+
environment:
25+
- LOAD_MODEL=llama3.2:1b
26+
# - LOAD_MODEL=gemma:7b
27+
healthcheck:
28+
# wget or curl required for healthchecks on services with a published port
29+
# this gets parsed by Defang and provided to the load balancers as well
30+
test: [ "CMD", "curl", "-s", "http://localhost:8000/" ]
31+
32+
ui:
33+
build:
34+
context: ui
35+
dockerfile: Dockerfile
36+
ports:
37+
- mode: ingress
38+
target: 3000
39+
published: 3000
40+
deploy:
41+
resources:
42+
reservations:
43+
memory: 256M
44+
healthcheck:
45+
# wget or curl required for healthchecks on services with a published port
46+
# this gets parsed by Defang and provided to the load balancers as well
47+
test: [ "CMD", "curl", "-s", "http://localhost:3000/" ]
48+
environment:
49+
- OLLAMA_ENDPOINT=http://ollama:8000
50+
# Make sure the LOAD_MODEL is the same as the app service
51+
- LOAD_MODEL=llama3.2:1b
52+
# - LOAD_MODEL=gemma:7b

samples/ollama/ollama/.dockerignore

Whitespace-only changes.

samples/ollama/ollama/.gitignore

Whitespace-only changes.

samples/ollama/ollama/Dockerfile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
FROM debian:bookworm-slim
2+
3+
# install curl
4+
RUN apt-get update && apt-get install -y curl && apt-get clean
5+
6+
# Define build-time arguments with default values
7+
ARG OLLAMA_HOST=0.0.0.0:8000
8+
ARG OLLAMA_MODELS=/usr/share/ollama/.ollama/models
9+
10+
# Use the build-time arguments to set environment variables
11+
ENV OLLAMA_HOST=${OLLAMA_HOST}
12+
ENV OLLAMA_MODELS=${OLLAMA_MODELS}
13+
ENV LOAD_MODEL=llama3.2:1b
14+
15+
RUN curl -fsSL https://ollama.com/install.sh | sh
16+
17+
WORKDIR /app
18+
19+
COPY ./run.sh ./run.sh
20+
21+
CMD ["./run.sh"]

samples/ollama/ollama/run.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
3+
nohup ollama serve >ollama.log 2>&1 &
4+
time curl --retry 5 --retry-connrefused --retry-delay 0 -sf http://localhost:8000
5+
6+
echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
7+
echo "@ Loading model $LOAD_MODEL @"
8+
echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
9+
10+
# if $LOAD_MODEL is not set, crash the container
11+
if [ -z "$LOAD_MODEL" ]; then
12+
echo "LOAD_MODEL is not set. Exiting..."
13+
exit 1
14+
fi
15+
16+
ollama pull "$LOAD_MODEL"
17+
18+
tail -f ollama.log

0 commit comments

Comments
 (0)