Skip to content

Commit 64dfecc

Browse files
committed
added docker runner and reduced persited files
1 parent a8e1301 commit 64dfecc

File tree

7 files changed

+162
-140
lines changed

7 files changed

+162
-140
lines changed

.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.git
2+
.gitignore
3+
tracker-data
4+
*.log
5+
docker-compose.yml

.github/workflows/build.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: Build
2+
3+
on:
4+
push:
5+
branches: [main]
6+
tags: ['v*']
7+
pull_request:
8+
branches: [main]
9+
10+
permissions:
11+
contents: write
12+
packages: write
13+
14+
jobs:
15+
docker:
16+
runs-on: ubuntu-latest
17+
steps:
18+
- uses: actions/checkout@v5
19+
20+
- uses: docker/setup-buildx-action@v4
21+
22+
- name: Log in to GitHub Container Registry
23+
if: github.event_name != 'pull_request'
24+
uses: docker/login-action@v4
25+
with:
26+
registry: ghcr.io
27+
username: ${{ github.actor }}
28+
password: ${{ secrets.GITHUB_TOKEN }}
29+
30+
- name: Docker metadata
31+
id: meta
32+
uses: docker/metadata-action@v6
33+
with:
34+
images: ghcr.io/${{ github.repository }}
35+
tags: |
36+
type=ref,event=branch
37+
type=semver,pattern={{version}}
38+
type=semver,pattern={{major}}.{{minor}}
39+
type=sha
40+
41+
- name: Build and push
42+
uses: docker/build-push-action@v7
43+
with:
44+
context: .
45+
push: ${{ github.event_name != 'pull_request' }}
46+
tags: ${{ steps.meta.outputs.tags }}
47+
labels: ${{ steps.meta.outputs.labels }}
48+
cache-from: type=gha
49+
cache-to: type=gha,mode=max

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ ipfs-archive-tracker
1313
*.tmp
1414

1515
# local docs and notes
16-
docs
16+
docs
17+
docker-compose.yml

Dockerfile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
FROM golang:1.25-alpine AS build
2+
WORKDIR /src
3+
COPY go.mod go.sum ./
4+
RUN go mod download
5+
COPY . .
6+
RUN CGO_ENABLED=0 go build -o /ipfs-archive-tracker .
7+
8+
FROM alpine:3.21
9+
RUN apk add --no-cache ca-certificates poppler-utils
10+
COPY --from=build /ipfs-archive-tracker /usr/local/bin/ipfs-archive-tracker
11+
EXPOSE 8384 8385
12+
ENTRYPOINT ["ipfs-archive-tracker"]

README.md

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,33 @@
1111
go build -o ipfs-archive-tracker .
1212
```
1313

14+
## Docker / Compose
15+
16+
A pre-built image is published to GHCR on every push to `main` and on version tags:
17+
18+
```sh
19+
docker pull ghcr.io/gipplab/ipfs-archive-tracker:main
20+
```
21+
22+
Or build locally:
23+
24+
```sh
25+
docker build -t ipfs-archive-tracker .
26+
```
27+
28+
Run container (persist data and expose only public API):
29+
30+
```sh
31+
docker run -d --name ipfs-archive-tracker \
32+
-p 8385:8385 \
33+
-v "$(pwd)/tracker-data:/data" \
34+
-v "$(pwd)/.api_key:/data/.api_key:ro" \
35+
ghcr.io/gipplab/ipfs-archive-tracker:main \
36+
-o /data -public-port 8385 -port 8384
37+
```
38+
39+
See `docker-compose.yml` for a full Compose example. If Kubo runs in another container/network, set `-kubo` to that service URL instead.
40+
1441
## API key
1542

1643
Indexing needs an API key: `.api_key` in `-o` or cwd, or `SAIA_API_KEY`.
@@ -30,22 +57,24 @@ Two servers by default: internal (Web UI: 127.0.0.1:8384) and public (Archives A
3057
./ipfs-archive-tracker -o ./index-data -port 9000 -public-port 9001
3158
```
3259

33-
## Flags
34-
35-
| Flag | Default | Description |
36-
|------|---------|-------------|
37-
| `-o` | `.` | Output directory for index and data files |
38-
| `-gateway` | `https://ipfs.io` | IPFS gateway base URL |
39-
| `-kubo` | `http://localhost:5001` | Kubo API URL for IPNS resolution |
40-
| `-workers` | `4` | Number of concurrent processing workers |
41-
| `-model` | `meta-llama-3.1-8b-instruct` | LLM model for keyword extraction |
42-
| `-fallback-model` | `llama-3.3-70b-instruct` | Model to try if primary returns 429 |
43-
| `-api-base` | `https://chat-ai.academiccloud.de/v1` | OpenAI-compatible API base URL |
44-
| `-spacing` | `100ms` | Minimum delay between dispatching CIDs |
45-
| `-cli` | `false` | Index pending CIDs from archives and exit (no web UI) |
46-
| `-port` | `8384` | Web UI port (localhost only) |
47-
| `-public-port` | `8385` | Public API port (archives only, bind all interfaces) |
48-
| `-refresh` | `10m` | Interval to refresh IPNS for all archives (0 to disable) |
60+
## Configuration
61+
62+
All settings can be provided as CLI flags or environment variables. Flags take precedence.
63+
64+
| Flag | Env var | Default | Description |
65+
|------|---------|---------|-------------|
66+
| `-o` | `TRACKER_DATA_DIR` | `.` | Output directory for index files |
67+
| `-gateway` | `TRACKER_GATEWAY` | `https://ipfs.io` | IPFS gateway base URL |
68+
| `-kubo` | `KUBO_API` | `http://localhost:5001` | Kubo API URL for IPNS resolution |
69+
| `-workers` | `TRACKER_WORKERS` | `4` | Number of concurrent processing workers |
70+
| `-model` | `TRACKER_MODEL` | `meta-llama-3.1-8b-instruct` | LLM model for keyword extraction |
71+
| `-fallback-model` | `TRACKER_FALLBACK_MODEL` | `llama-3.3-70b-instruct` | Model to try if primary returns 429 |
72+
| `-api-base` | `TRACKER_API_BASE` | `https://chat-ai.academiccloud.de/v1` | OpenAI-compatible API base URL |
73+
| `-spacing` | `TRACKER_SPACING` | `100ms` | Minimum delay between dispatching CIDs |
74+
| `-cli` | `TRACKER_CLI` | `false` | Index pending CIDs from archives and exit (no web UI) |
75+
| `-port` | `TRACKER_PORT` | `8384` | Web UI port (localhost only) |
76+
| `-public-port` | `TRACKER_PUBLIC_PORT` | `8385` | Public API port (archives only, bind all interfaces) |
77+
| `-refresh` | `TRACKER_REFRESH` | `10m` | Interval to refresh IPNS for all archives (0 to disable) |
4978

5079
With the default `-api-base` (Chat AI / Academic Cloud), rate limits from the API are 1000 req/min, 10000/hour, 50002/day. Current models and exact API IDs: see [docs/chat-ai-api.md](docs/chat-ai-api.md) or `GET https://chat-ai.academiccloud.de/v1/models` (with your API key).
5180

@@ -74,9 +103,7 @@ Expose only port 8385 (not 8384).
74103

75104
| File | Description |
76105
|------|-------------|
77-
| `keyword_index.json` | Indexed metadata keyed by CID |
78-
| `keyword_failures.json` | Permanently failed CIDs with error details |
79-
| `archives.json` | Archive ID → name, CIDs (source of all known CIDs) |
106+
| `keyword_index.json` | Indexed metadata keyed by CID (only persisted file) |
80107

81108
Example entry in `keyword_index.json`:
82109

main.go

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"os"
1212
"os/signal"
1313
"path/filepath"
14+
"strconv"
1415
"strings"
1516
"sync"
1617
"syscall"
@@ -29,20 +30,50 @@ type PipelineConfig struct {
2930
KuboHTTP *http.Client
3031
}
3132

33+
func envStr(key, fallback string) string {
34+
if v := os.Getenv(key); v != "" {
35+
return v
36+
}
37+
return fallback
38+
}
39+
40+
func envInt(key string, fallback int) int {
41+
if v := os.Getenv(key); v != "" {
42+
if n, err := strconv.Atoi(v); err == nil {
43+
return n
44+
}
45+
}
46+
return fallback
47+
}
48+
49+
func envDuration(key string, fallback time.Duration) time.Duration {
50+
if v := os.Getenv(key); v != "" {
51+
if d, err := time.ParseDuration(v); err == nil {
52+
return d
53+
}
54+
}
55+
return fallback
56+
}
57+
58+
func envBool(key string) bool {
59+
v := strings.ToLower(os.Getenv(key))
60+
return v == "1" || v == "true"
61+
}
62+
3263
func main() {
3364
var (
34-
outputDir = flag.String("o", ".", "output directory for index and failure files")
35-
gateway = flag.String("gateway", "https://ipfs.io", "IPFS gateway base URL")
36-
kuboAPI = flag.String("kubo", "http://localhost:5001", "Kubo API URL for IPNS resolution")
37-
workers = flag.Int("workers", 4, "number of concurrent processing workers")
38-
model = flag.String("model", defaultModel, "LLM model for keyword extraction")
39-
fallbackModel = flag.String("fallback-model", defaultFallbackModel, "fallback model if primary is rate limited")
40-
apiBase = flag.String("api-base", defaultAPIBase, "OpenAI-compatible API base URL")
41-
spacing = flag.Duration("spacing", 100*time.Millisecond, "minimum delay between dispatching CIDs to workers")
42-
cli = flag.Bool("cli", false, "run in CLI mode (index CIDs and exit, no web UI)")
43-
port = flag.Int("port", defaultPort, "web UI port (localhost only)")
44-
publicPort = flag.Int("public-port", defaultPublicPort, "public API port (archives only, bind all interfaces)")
45-
refreshInterval = flag.Duration("refresh", 10*time.Minute, "interval to refresh IPNS for all archives (0 to disable)")
65+
outputDir = flag.String("o", envStr("TRACKER_DATA_DIR", "."), "output directory for index files")
66+
gateway = flag.String("gateway", envStr("TRACKER_GATEWAY", "https://ipfs.io"), "IPFS gateway base URL")
67+
kuboAPI = flag.String("kubo", envStr("KUBO_API", "http://localhost:5001"), "Kubo API URL for IPNS resolution")
68+
workers = flag.Int("workers", envInt("TRACKER_WORKERS", 4), "number of concurrent processing workers")
69+
model = flag.String("model", envStr("TRACKER_MODEL", defaultModel), "LLM model for keyword extraction")
70+
fallbackModel = flag.String("fallback-model", envStr("TRACKER_FALLBACK_MODEL", defaultFallbackModel), "fallback model if primary is rate limited")
71+
apiBase = flag.String("api-base", envStr("TRACKER_API_BASE", defaultAPIBase), "OpenAI-compatible API base URL")
72+
spacing = flag.Duration("spacing", envDuration("TRACKER_SPACING", 100*time.Millisecond), "minimum delay between dispatching CIDs to workers")
73+
cli = flag.Bool("cli", envBool("TRACKER_CLI"), "run in CLI mode (index CIDs and exit, no web UI)")
74+
port = flag.Int("port", envInt("TRACKER_PORT", defaultPort), "web UI port (localhost only)")
75+
publicPort = flag.Int("public-port", envInt("TRACKER_PUBLIC_PORT", defaultPublicPort), "public API port (archives only, bind all interfaces)")
76+
refreshInterval = flag.Duration("refresh", envDuration("TRACKER_REFRESH", 10*time.Minute), "interval to refresh IPNS for all archives (0 to disable)")
4677
)
4778
flag.Usage = func() {
4879
fmt.Fprintf(os.Stderr, "Usage: %s [flags]\n\n", os.Args[0])
@@ -53,6 +84,8 @@ func main() {
5384
fmt.Fprintf(os.Stderr, "Use -cli to index once from archives and exit (no web server).\n\n")
5485
fmt.Fprintf(os.Stderr, "API key (required for indexing):\n")
5586
fmt.Fprintf(os.Stderr, " Read from .api_key file (in -o dir, then cwd), or SAIA_API_KEY env var.\n\n")
87+
fmt.Fprintf(os.Stderr, "All flags can also be set via environment variables (TRACKER_*).\n")
88+
fmt.Fprintf(os.Stderr, "Flags take precedence over environment variables.\n\n")
5689
fmt.Fprintf(os.Stderr, "Flags:\n")
5790
flag.PrintDefaults()
5891
}

0 commit comments

Comments
 (0)