Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,12 @@ LLM_DOCKERFILE=./src/llm/Dockerfile
LLM_HOST=llm
LLM_PORT=8000

# Mock LLM - to use a real OpenAI API-compatible LLM,
# configure the .env.override file
# LLM Service - uses local SmolLM2-135M by default
# To use a real OpenAI API-compatible LLM, configure .env.override
LLM_BASE_URL=http://${LLM_HOST}:${LLM_PORT}/v1
LLM_MODEL=astronomy-llm
LLM_MODEL=smollm2-135m
OPENAI_API_KEY=dummy
SMOLLM_MODEL_PATH=/app/models/SmolLM2-135M-Instruct-Q4_K_M.gguf

# Valkey
VALKEY_PORT=6379
Expand Down
6 changes: 2 additions & 4 deletions .env.override
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
# PLACE YOUR .env ENVIRONMENT VARIABLES OVERRIDES IN THIS FILE


# To use a real OpenAI API-compatible LLM,
# set the appropriate values for the target LLM
# Required permissions:
# [Models and Model capabilities]
# To use a real OpenAI API-compatible LLM instead of the built-in SmolLM2,
# uncomment and set ALL three values below:

#LLM_BASE_URL=https://api.openai.com/v1
#LLM_MODEL=gpt-4o-mini
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ the release.

## Unreleased

* [llm] Replace mock LLM with local SmolLM2-135M inference via llama-cpp-python
([#3014](https://github.com/open-telemetry/opentelemetry-demo/pull/3014))
* [accounting] fix memory leak with dbcontext
([#2876](https://github.com/open-telemetry/opentelemetry-demo/pull/2876))
* [chore] Upgrade OTel Collector to v0.145.0 with :warning: breaking change:
Expand Down
9 changes: 8 additions & 1 deletion docker-compose.minimal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -612,11 +612,18 @@ services:
deploy:
resources:
limits:
memory: 50M
memory: 150M
restart: unless-stopped
environment:
- FLAGD_HOST
- FLAGD_PORT
- LLM_PORT
- LLM_MODEL
- SMOLLM_MODEL_PATH
- OTEL_EXPORTER_OTLP_ENDPOINT
- OTEL_SERVICE_NAME=llm
- OTEL_RESOURCE_ATTRIBUTES
- OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
ports:
- "${LLM_PORT}"
logging: *logging
Expand Down
11 changes: 10 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -763,16 +763,25 @@ services:
deploy:
resources:
limits:
memory: 50M
memory: 150M
restart: unless-stopped
environment:
- FLAGD_HOST
- FLAGD_PORT
- LLM_PORT
- LLM_MODEL
- SMOLLM_MODEL_PATH
- OTEL_EXPORTER_OTLP_ENDPOINT
- OTEL_SERVICE_NAME=llm
- OTEL_RESOURCE_ATTRIBUTES
- OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
ports:
- "${LLM_PORT}"
depends_on:
flagd:
condition: service_started
otel-collector:
condition: service_started
logging: *logging

# Postgresql used by Accounting service
Expand Down
31 changes: 21 additions & 10 deletions src/llm/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,37 @@
# Copyright The OpenTelemetry Authors
# SPDX-License-Identifier: Apache-2.0

# Build stage: compile llama-cpp-python and download model
FROM docker.io/library/python:3.12-slim-bookworm AS build

FROM docker.io/library/python:3.12-alpine3.22 AS build-venv

RUN apk update && \
apk add gcc g++ linux-headers
RUN apt-get update && \
apt-get install -y --no-install-recommends build-essential g++ curl git && \
rm -rf /var/lib/apt/lists/*

COPY ./src/llm/requirements.txt requirements.txt

RUN python -m venv venv && \
venv/bin/pip install --no-cache-dir -r requirements.txt
ENV CMAKE_ARGS="-DGGML_NATIVE=OFF" CMAKE_BUILD_PARALLEL_LEVEL=2
RUN python -m venv /venv && \
/venv/bin/pip install --no-cache-dir -r requirements.txt

# Download SmolLM2-135M-Instruct Q4_K_M GGUF (~105MB)
RUN mkdir -p /models && \
curl -L -o /models/SmolLM2-135M-Instruct-Q4_K_M.gguf \
"https://huggingface.co/lmstudio-community/SmolLM2-135M-Instruct-GGUF/resolve/main/SmolLM2-135M-Instruct-Q4_K_M.gguf"

# Runtime stage
FROM docker.io/library/python:3.12-slim-bookworm

FROM docker.io/library/python:3.12-alpine3.22
RUN apt-get update && \
apt-get install -y --no-install-recommends libgomp1 && \
rm -rf /var/lib/apt/lists/*

COPY --from=build-venv /venv/ /venv/
COPY --from=build /venv/ /venv/
COPY --from=build /models/ /app/models/

WORKDIR /app

COPY ./src/llm/app.py app.py
COPY ./src/llm/product-review-summaries/product-review-summaries.json product-review-summaries.json
COPY ./src/llm/product-review-summaries/inaccurate-product-review-summaries.json inaccurate-product-review-summaries.json

EXPOSE ${LLM_PORT}
ENTRYPOINT [ "/venv/bin/python", "app.py" ]
Loading