Adding CPU and GPU variant

ArneTR · ArneTR · commit 83dafd100817 · 2025-04-30T08:53:51.000+02:00
diff --git a/ai-model/README.md b/ai-model/README.md
@@ -1,3 +1,15 @@
 # Introduction
 
 This is an example on how to run an AI model with [Ollama](https://ollama.com/)
+
+## Running the AI model on the CPU
+
+Just run the `runner.py` and use the `usage_scenario_cpu.yml`
+
+## Running the AI model on the GPU
+
+You must have the NVIDIA docker container provider installed and also a GPU on your system installed.
+
+See https://hub.docker.com/r/ollama/ollama for details.
+
+Then run the `runner.py` and use the `usage_scenario_gpu.yml` with the `--allow-unsafe` flag to mount the GPU into the containers.
diff --git a/ai-model/usage_scenario_cpu.yml b/ai-model/usage_scenario_cpu.yml
@@ -1,7 +1,7 @@
 ---
 name: AI model
 author: Arne Tarara <arne@green-coding.io>
-description: Run an inference with a small AI model
+description: Run an inference with a small AI model on the CPU
 
 compose-file: !include compose.yml
 
diff --git a/ai-model/usage_scenario_gpu.yml b/ai-model/usage_scenario_gpu.yml
@@ -0,0 +1,36 @@
+---
+name: AI model
+author: Arne Tarara <arne@green-coding.io>
+description: Run an inference with a small AI model on the GPU
+
+compose-file: !include compose.yml
+
+services:
+  gcb-ai-model:
+    docker-run-args:
+      - --gpus=all
+
+flow:
+  - name: Download gemma3-1b
+    container: gcb-ai-model
+    commands:
+      - type: console
+        command: ollama pull gemma3:1b
+        read-notes-stdout: true
+        log-stdout: true
+
+  - name: Load gemma3-1b into memory
+    container: gcb-ai-model
+    commands:
+      - type: console
+        command: ollama run gemma3:1b ""
+        read-notes-stdout: true
+        log-stdout: true
+
+  - name: Run Inference on gemma3-1b
+    container: gcb-ai-model
+    commands:
+      - type: console
+        command: ollama run gemma3:1b "Tell me a long joke?"
+        read-notes-stdout: true
+        log-stdout: true