Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,22 +93,32 @@ Common contribution types include: `doc`, `code`, `bug`, and `ideas`. See the fu

## Development environment

We use the ["scripts to rule them all"](https://github.blog/engineering/engineering-principles/scripts-to-rule-them-all/) philosophy to manage common tasks across the project. These are mostly backed by a Makefile that contains the implementation.

You'll need the following dependencies installed to build Cog locally:
- [Go](https://golang.org/doc/install): We're targeting 1.24, but you can install the latest version since Go is backwards compatible. If you're using a newer Mac with an M1 chip, be sure to download the `darwin-arm64` installer package. Alternatively you can run `brew install go` which will automatically detect and use the appropriate installer for your system architecture.
- [uv](https://docs.astral.sh/uv/): Python versions and dependencies are managed by uv.

- [Go](https://golang.org/doc/install): We're targeting 1.23, but you can install the latest version since Go is backwards compatible. If you're using a newer Mac with an M1 chip, be sure to download the `darwin-arm64` installer package. Alternatively you can run `brew install go` which will automatically detect and use the appropriate installer for your system architecture.
- [uv](https://docs.astral.sh/uv/): Python versions and dependencies are managed by uv, both in development and container environments.
- [Docker](https://docs.docker.com/desktop) or [OrbStack](https://orbstack.dev)

Install the Python dependencies:

script/setup

Once you have Go installed you can install the cog binary by running:
Once you have Go installed, run:

make install

This will build and install the `cog` binary to `/usr/local/bin/cog`. You can then use it to build and run models.

## Package Management

Cog uses [uv](https://docs.astral.sh/uv/) for Python package management, both in development and container environments. This provides:

make install PREFIX=$(go env GOPATH)
- Fast, reliable package installation
- Consistent dependency resolution
- Efficient caching
- Reproducible builds

This installs the `cog` binary to `$GOPATH/bin/cog`.
When building containers, uv is automatically installed and used to install Python packages from requirements.txt files. The cache is mounted at `/srv/r8/uv/cache` to speed up subsequent builds.

To run ALL the tests:

Expand Down
33 changes: 16 additions & 17 deletions pkg/dockerfile/standard_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func NewStandardGenerator(config *config.Config, dir string, command command.Com
Config: config,
Dir: dir,
GOOS: runtime.GOOS,
GOARCH: runtime.GOOS,
GOARCH: runtime.GOARCH,
tmpDir: tmpDir,
relativeTmpDir: relativeTmpDir,
fileWalker: filepath.Walk,
Expand Down Expand Up @@ -414,18 +414,17 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update -qq &
git \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
` + fmt.Sprintf(`

ENV UV_CACHE_DIR="/srv/r8/uv/cache"
RUN --mount=type=cache,target=/root/.cache/pip curl -s -S -L https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer | bash && \
git clone https://github.com/momo-lab/pyenv-install-latest.git "$(pyenv root)"/plugins/pyenv-install-latest && \
export PYTHON_CONFIGURE_OPTS='--enable-optimizations --with-lto' && \
export PYTHON_CFLAGS='-O3' && \
pyenv install-latest "%s" && \
pyenv global $(pyenv install-latest --print "%s") && \
pip install "wheel<1"`, py, py) + `
pyenv install-latest "` + py + `" && \
pyenv global $(pyenv install-latest --print "` + py + `") && \
curl -LsSf https://astral.sh/uv/install.sh | sh

RUN rm -rf /usr/bin/python3 && ln -s ` + "`realpath \\`pyenv which python\\`` /usr/bin/python3 && chmod +x /usr/bin/python3", nil
// for sitePackagesLocation, kind of need to determine which specific version latest is (3.8 -> 3.8.17 or 3.8.18)
// install-latest essentially does pyenv install --list | grep $py | tail -1
// there are many bad options, but a symlink to $(pyenv prefix) is the least bad one
}

func (g *StandardGenerator) installCog() (string, error) {
Expand All @@ -451,7 +450,7 @@ func (g *StandardGenerator) installCog() (string, error) {
cmds := []string{
"ENV R8_COG_VERSION=coglet",
"ENV R8_PYTHON_VERSION=" + g.Config.Build.PythonVersion,
"RUN pip install " + m.LatestCoglet.URL,
"RUN --mount=type=cache,target=/srv/r8/uv/cache,id=uv-cache uv pip install " + m.LatestCoglet.URL,
}
return strings.Join(cmds, "\n"), nil
}
Expand All @@ -469,13 +468,13 @@ func (g *StandardGenerator) installCog() (string, error) {
if err != nil {
return "", err
}
pipInstallLine := "RUN --mount=type=cache,target=/root/.cache/pip pip install --no-cache-dir"
pipInstallLine += " " + containerPath
pipInstallLine += " 'pydantic>=1.9,<3'"
uvInstallLine := "RUN --mount=type=cache,target=/srv/r8/uv/cache,id=uv-cache uv pip install --no-cache-dir"
uvInstallLine += " " + containerPath
uvInstallLine += " 'pydantic>=1.9,<3'"
if g.strip {
pipInstallLine += " && " + StripDebugSymbolsCommand
uvInstallLine += " && " + StripDebugSymbolsCommand
}
lines = append(lines, CFlags, pipInstallLine, "ENV CFLAGS=")
lines = append(lines, CFlags, uvInstallLine, "ENV CFLAGS=")
return strings.Join(lines, "\n"), nil
}

Expand Down Expand Up @@ -509,14 +508,14 @@ func (g *StandardGenerator) pipInstalls() (string, error) {
return "", err
}

pipInstallLine := "RUN --mount=type=cache,target=/root/.cache/pip pip install -r " + containerPath
uvInstallLine := "RUN --mount=type=cache,target=/srv/r8/uv/cache,id=uv-cache uv pip install -r " + containerPath
if g.strip {
pipInstallLine += " && " + StripDebugSymbolsCommand
uvInstallLine += " && " + StripDebugSymbolsCommand
}
return strings.Join([]string{
copyLine[0],
CFlags,
pipInstallLine,
uvInstallLine,
"ENV CFLAGS=",
}, "\n"), nil
}
Expand Down
61 changes: 58 additions & 3 deletions pkg/dockerfile/standard_generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func testInstallCog(relativeTmpDir string, stripped bool) string {
}
return fmt.Sprintf(`COPY %s/%s /tmp/%s
ENV CFLAGS="-O3 -funroll-loops -fno-strict-aliasing -flto -S"
RUN --mount=type=cache,target=/root/.cache/pip pip install --no-cache-dir /tmp/%s 'pydantic>=1.9,<3'%s
RUN --mount=type=cache,target=/srv/r8/uv/cache,id=uv-cache uv pip install --no-cache-dir /tmp/%s 'pydantic>=1.9,<3'%s
ENV CFLAGS=`, relativeTmpDir, wheel, wheel, wheel, strippedCall)
}

Expand All @@ -73,13 +73,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update -qq &
git \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

ENV UV_CACHE_DIR="/srv/r8/uv/cache"
RUN --mount=type=cache,target=/root/.cache/pip curl -s -S -L https://raw.githubusercontent.com/pyenv/pyenv-installer/master/bin/pyenv-installer | bash && \
git clone https://github.com/momo-lab/pyenv-install-latest.git "$(pyenv root)"/plugins/pyenv-install-latest && \
export PYTHON_CONFIGURE_OPTS='--enable-optimizations --with-lto' && \
export PYTHON_CFLAGS='-O3' && \
pyenv install-latest "%s" && \
pyenv global $(pyenv install-latest --print "%s") && \
pip install "wheel<1"
curl -LsSf https://astral.sh/uv/install.sh | sh
`, version, version)
}

Expand Down Expand Up @@ -414,7 +416,7 @@ ENV NVIDIA_DRIVER_CAPABILITIES=all
` + testInstallPython("3.12") + `RUN rm -rf /usr/bin/python3 && ln -s ` + "`realpath \\`pyenv which python\\`` /usr/bin/python3 && chmod +x /usr/bin/python3" + `
COPY ` + gen.relativeTmpDir + `/requirements.txt /tmp/requirements.txt
ENV CFLAGS="-O3 -funroll-loops -fno-strict-aliasing -flto -S"
RUN --mount=type=cache,target=/root/.cache/pip pip install -r /tmp/requirements.txt
RUN --mount=type=cache,target=/srv/r8/uv/cache,id=uv-cache uv pip install -r /tmp/requirements.txt
ENV CFLAGS=
` + testInstallCog(gen.relativeTmpDir, gen.strip) + `
RUN find / -type f -name "*python*.so" -printf "%h\n" | sort -u > /etc/ld.so.conf.d/cog.conf && ldconfig
Expand Down Expand Up @@ -898,3 +900,56 @@ torch==2.3.1
pandas==2.0.3
coglet @ https://github.com/replicate/cog-runtime/releases/download/v0.1.0-alpha31/coglet-0.1.0a31-py3-none-any.whl`, string(requirements))
}

func TestGenerateDockerfileStripped(t *testing.T) {
tmpDir := t.TempDir()

conf, err := config.FromYAML([]byte(`
build:
gpu: true
cuda: "11.8"
python_version: "3.12"
system_packages:
- ffmpeg
- cowsay
python_packages:
- torch==2.3.1
- pandas==2.0.3
run:
- "cowsay moo"
predict: predict.py:Predictor
`))
require.NoError(t, err)
require.NoError(t, conf.ValidateAndComplete(""))
command := dockertest.NewMockCommand()
client := registrytest.NewMockRegistryClient()
gen, err := NewStandardGenerator(conf, tmpDir, command, client, true)
require.NoError(t, err)
gen.SetUseCogBaseImage(true)
gen.SetStrip(true)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights(t.Context(), "r8.im/replicate/cog-test")
require.NoError(t, err)

expected := `#syntax=docker/dockerfile:1.4
FROM r8.im/replicate/cog-test-weights AS weights
FROM r8.im/cog-base:cuda11.8-python3.12-torch2.3.1
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update -qq && apt-get install -qqy cowsay && rm -rf /var/lib/apt/lists/*
COPY ` + gen.relativeTmpDir + `/requirements.txt /tmp/requirements.txt
ENV CFLAGS="-O3 -funroll-loops -fno-strict-aliasing -flto -S"
RUN --mount=type=cache,target=/srv/r8/uv/cache,id=uv-cache uv pip install -r /tmp/requirements.txt && find / -type f -name "*python*.so" -not -name "*cpython*.so" -exec strip -S {} \;
ENV CFLAGS=
RUN find / -type f -name "*.py[co]" -delete && find / -type f -name "*.py" -exec touch -t 197001010000 {} \; && find / -type f -name "*.py" -printf "%h\n" | sort -u | /usr/bin/python3 -m compileall --invalidation-mode timestamp -o 2 -j 0
RUN cowsay moo
WORKDIR /src
EXPOSE 5000
CMD ["python", "-m", "cog.server.http"]
COPY . /src`

require.Equal(t, expected, actual)

requirements, err := os.ReadFile(path.Join(gen.tmpDir, "requirements.txt"))
require.NoError(t, err)
require.Equal(t, `--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.3.1
pandas==2.0.3`, string(requirements))
}
89 changes: 65 additions & 24 deletions python/cog/server/webhook.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Callable, Set

import requests
Expand All @@ -16,6 +17,10 @@
log = structlog.get_logger(__name__)

_response_interval = float(os.environ.get("COG_THROTTLE_RESPONSE_INTERVAL", 0.5))
_webhook_timeout = float(
os.environ.get("COG_WEBHOOK_TIMEOUT", 10.0)
) # 10 second timeout by default
_webhook_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="webhook")

# HACK: signal that we should skip the start webhook when the response interval
# is tuned below 100ms. This should help us get output sooner for models that
Expand All @@ -27,11 +32,40 @@ def webhook_caller_filtered(
webhook: str,
webhook_events_filter: Set[WebhookEvent],
) -> Callable[[Any, WebhookEvent], None]:
upstream_caller = webhook_caller(webhook)
# Create a session for this webhook
default_session = requests_session()
retry_session = requests_session_with_retries()
throttler = ResponseThrottler(response_interval=_response_interval)

def _send_webhook(response: PredictionResponse, session: requests.Session) -> None:
if PYDANTIC_V2:
dict_response = jsonable_encoder(response.model_dump(exclude_unset=True))
else:
dict_response = jsonable_encoder(response.dict(exclude_unset=True))

try:
session.post(webhook, json=dict_response, timeout=_webhook_timeout)
except requests.exceptions.Timeout:
log.warn("webhook request timed out", webhook=webhook)
except requests.exceptions.RequestException:
log.warn("caught exception while sending webhook", exc_info=True)

def caller(response: PredictionResponse, event: WebhookEvent) -> None:
if event in webhook_events_filter:
upstream_caller(response)
if event not in webhook_events_filter:
return

if not throttler.should_send_response(response):
return

# Use a separate thread for webhook calls to avoid blocking
if Status.is_terminal(response.status):
# For terminal updates, retry persistently but in background
_webhook_executor.submit(_send_webhook, response, retry_session)
else:
# For other requests, don't retry, and ignore any errors
_webhook_executor.submit(_send_webhook, response, default_session)

throttler.update_last_sent_response_time()

return caller

Expand All @@ -44,24 +78,32 @@ def webhook_caller(webhook: str) -> Callable[[Any], None]:
default_session = requests_session()
retry_session = requests_session_with_retries()

def _send_webhook(response: PredictionResponse, session: requests.Session) -> None:
if PYDANTIC_V2:
dict_response = jsonable_encoder(response.model_dump(exclude_unset=True))
else:
dict_response = jsonable_encoder(response.dict(exclude_unset=True))

try:
session.post(webhook, json=dict_response, timeout=_webhook_timeout)
except requests.exceptions.Timeout:
log.warn("webhook request timed out", webhook=webhook)
except requests.exceptions.RequestException:
log.warn("caught exception while sending webhook", exc_info=True)

def caller(response: PredictionResponse) -> None:
if throttler.should_send_response(response):
if PYDANTIC_V2:
dict_response = jsonable_encoder(
response.model_dump(exclude_unset=True)
)
else:
dict_response = jsonable_encoder(response.dict(exclude_unset=True))
if Status.is_terminal(response.status):
# For terminal updates, retry persistently
retry_session.post(webhook, json=dict_response)
else:
# For other requests, don't retry, and ignore any errors
try:
default_session.post(webhook, json=dict_response)
except requests.exceptions.RequestException:
log.warn("caught exception while sending webhook", exc_info=True)
throttler.update_last_sent_response_time()
if not throttler.should_send_response(response):
return

# Use a separate thread for webhook calls to avoid blocking
if Status.is_terminal(response.status):
# For terminal updates, retry persistently but in background
_webhook_executor.submit(_send_webhook, response, retry_session)
else:
# For other requests, don't retry, and ignore any errors
_webhook_executor.submit(_send_webhook, response, default_session)

throttler.update_last_sent_response_time()

return caller

Expand All @@ -84,13 +126,12 @@ def requests_session() -> requests.Session:


def requests_session_with_retries() -> requests.Session:
# This session will retry requests up to 12 times, with exponential
# backoff. In total it'll try for up to roughly 320 seconds, providing
# resilience through temporary networking and availability issues.
# This session will retry requests up to 6 times (reduced from 12), with exponential
# backoff. In total it'll try for up to roughly 60 seconds (reduced from 320s).
session = requests_session()
adapter = HTTPAdapter(
max_retries=Retry(
total=12,
total=6, # Reduced from 12 to avoid blocking too long
backoff_factor=0.1,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["POST"],
Expand Down
Loading