Skip to content

Commit 3e9f95f

Browse files
authored
feat: Zig builds for CGO enabling sqlite-vec integration (#39)
* feat: Zig builds for CGO * feat: Sqlite-vec embedded Signed-off-by: John McBride <john@papercompute.com>
1 parent 91ec7e4 commit 3e9f95f

File tree

17 files changed

+1030
-133
lines changed

17 files changed

+1030
-133
lines changed

.dagger/build.go

Lines changed: 133 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"fmt"
5+
"runtime"
56
"strings"
67
"time"
78

@@ -10,7 +11,37 @@ import (
1011
"dagger/tapes/internal/dagger"
1112
)
1213

13-
// Build and return directory of go binaries
14+
const (
15+
zigVersion string = "0.15.2"
16+
17+
// osxcross image provides the macOS SDK and cross-compilation toolchain
18+
// for building CGO-enabled Go binaries targeting darwin from Linux containers.
19+
osxcrossImage string = "crazymax/osxcross:latest-ubuntu"
20+
)
21+
22+
type buildTarget struct {
23+
goos string
24+
goarch string
25+
cc string
26+
cxx string
27+
cgoFlags string
28+
cgoLdFlags string
29+
}
30+
31+
func zigArch() string {
32+
switch runtime.GOARCH {
33+
case "arm64":
34+
return "aarch64"
35+
case "amd64":
36+
return "x86_64"
37+
default:
38+
return runtime.GOARCH
39+
}
40+
}
41+
42+
// Build and return directory of go binaries for all platforms.
43+
// Linux targets are cross-compiled using Zig as the C toolchain.
44+
// Darwin targets are cross-compiled using osxcross (macOS SDK + clang).
1445
func (t *Tapes) Build(
1546
ctx context.Context,
1647

@@ -19,41 +50,117 @@ func (t *Tapes) Build(
1950
// +default="-s -w"
2051
ldflags string,
2152
) *dagger.Directory {
22-
// define build matrix
23-
gooses := []string{"linux", "darwin"}
24-
goarches := []string{"amd64", "arm64"}
25-
26-
// create empty directory to put build artifacts
2753
outputs := dag.Directory()
54+
outputs = t.buildLinux(outputs, ldflags)
55+
outputs = t.buildDarwin(outputs, ldflags)
56+
return outputs
57+
}
58+
59+
// buildLinux compiles Go binaries for linux/amd64 and linux/arm64
60+
// using Zig as the cross-compilation C toolchain.
61+
func (t *Tapes) buildLinux(outputs *dagger.Directory, ldflags string) *dagger.Directory {
62+
cgoFlags := "-I/opt/sqlite -fno-sanitize=all"
63+
cgoLdFlags := "-fno-sanitize=all"
64+
65+
targets := []buildTarget{
66+
{"linux", "amd64", "zig cc -target x86_64-linux-gnu", "zig c++ -target x86_64-linux-gnu", cgoFlags, cgoLdFlags},
67+
{"linux", "arm64", "zig cc -target aarch64-linux-gnu", "zig c++ -target aarch64-linux-gnu", cgoFlags, cgoLdFlags},
68+
}
69+
70+
// Build zig download URL based on host architecture
71+
zigArch := zigArch()
72+
zigDownloadURL := fmt.Sprintf("https://ziglang.org/download/%s/zig-%s-linux-%s.tar.xz", zigVersion, zigArch, zigVersion)
73+
zigDir := fmt.Sprintf("zig-%s-linux-%s", zigArch, zigVersion)
74+
75+
golang := dag.Container().
76+
From("golang:1.25-bookworm").
77+
WithExec([]string{"apt-get", "update"}).
78+
WithExec([]string{"apt-get", "install", "-y", "libsqlite3-dev", "xz-utils"}).
79+
WithExec([]string{"mkdir", "-p", "/opt/sqlite"}).
80+
WithExec([]string{"cp", "/usr/include/sqlite3.h", "/opt/sqlite/"}).
81+
WithExec([]string{"cp", "/usr/include/sqlite3ext.h", "/opt/sqlite/"}).
82+
WithExec([]string{"sh", "-c", fmt.Sprintf("curl -L %s | tar -xJ -C /usr/local", zigDownloadURL)}).
83+
WithEnvVariable("PATH", fmt.Sprintf("/usr/local/%s:$PATH", zigDir), dagger.ContainerWithEnvVariableOpts{Expand: true}).
84+
WithMountedCache("/go/pkg/mod", dag.CacheVolume("go-mod")).
85+
WithMountedCache("/root/.cache/go-build", dag.CacheVolume("go-build")).
86+
WithDirectory("/src", t.Source).
87+
WithWorkdir("/src")
88+
89+
for _, target := range targets {
90+
path := fmt.Sprintf("%s/%s/", target.goos, target.goarch)
91+
92+
build := golang.
93+
WithEnvVariable("CGO_ENABLED", "1").
94+
WithEnvVariable("GOEXPERIMENT", "jsonv2").
95+
WithEnvVariable("GOOS", target.goos).
96+
WithEnvVariable("GOARCH", target.goarch).
97+
WithEnvVariable("CC", target.cc).
98+
WithEnvVariable("CXX", target.cxx).
99+
WithEnvVariable("CGO_CFLAGS", target.cgoFlags).
100+
WithEnvVariable("CGO_LDFLAGS", target.cgoLdFlags).
101+
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapes"}).
102+
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapesprox"}).
103+
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapesapi"})
104+
105+
outputs = outputs.WithDirectory(path, build.Directory(path))
106+
}
107+
108+
return outputs
109+
}
110+
111+
// buildDarwin compiles Go binaries for darwin/amd64 and darwin/arm64
112+
// using the osxcross toolchain which provides the macOS SDK and clang
113+
// cross-compilers inside a Linux container.
114+
func (t *Tapes) buildDarwin(outputs *dagger.Directory, ldflags string) *dagger.Directory {
115+
cgoFlags := "-I/opt/sqlite"
116+
cgoLdFlags := ""
117+
118+
targets := []buildTarget{
119+
{"darwin", "amd64", "o64-clang", "o64-clang++", cgoFlags, cgoLdFlags},
120+
{"darwin", "arm64", "oa64-clang", "oa64-clang++", cgoFlags, cgoLdFlags},
121+
}
122+
123+
// Pull the osxcross toolchain (macOS SDK + clang cross-compilers)
124+
osxcross := dag.Container().
125+
From(osxcrossImage).
126+
Directory("/osxcross")
28127

128+
// Use Debian Trixie as the base for darwin builds because the osxcross
129+
// toolchain binaries require GLIBC 2.38+ (Bookworm only has 2.36).
29130
golang := dag.Container().
30-
From("golang:1.25-alpine").
31-
WithEnvVariable("CGO_ENABLED", "0").
32-
WithEnvVariable("GOEXPERIMENT", "jsonv2").
131+
From("golang:1.25-trixie").
132+
WithExec([]string{"apt-get", "update"}).
133+
WithExec([]string{"apt-get", "install", "-y", "clang", "lld", "libsqlite3-dev"}).
134+
WithExec([]string{"mkdir", "-p", "/opt/sqlite"}).
135+
WithExec([]string{"cp", "/usr/include/sqlite3.h", "/opt/sqlite/"}).
136+
WithExec([]string{"cp", "/usr/include/sqlite3ext.h", "/opt/sqlite/"}).
137+
WithDirectory("/osxcross", osxcross).
138+
WithEnvVariable("PATH", "/osxcross/bin:$PATH", dagger.ContainerWithEnvVariableOpts{Expand: true}).
139+
WithEnvVariable("LD_LIBRARY_PATH", "/osxcross/lib:$LD_LIBRARY_PATH", dagger.ContainerWithEnvVariableOpts{Expand: true}).
33140
WithMountedCache("/go/pkg/mod", dag.CacheVolume("go-mod")).
34141
WithMountedCache("/root/.cache/go-build", dag.CacheVolume("go-build")).
35142
WithDirectory("/src", t.Source).
36143
WithWorkdir("/src")
37144

38-
for _, goos := range gooses {
39-
for _, goarch := range goarches {
40-
// create directory for each OS and architecture
41-
path := fmt.Sprintf("%s/%s/", goos, goarch)
42-
43-
// build artifact
44-
build := golang.
45-
WithEnvVariable("GOOS", goos).
46-
WithEnvVariable("GOARCH", goarch).
47-
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapes"}).
48-
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapesprox"}).
49-
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapesapi"})
50-
51-
// add build to outputs
52-
outputs = outputs.WithDirectory(path, build.Directory(path))
53-
}
145+
for _, target := range targets {
146+
path := fmt.Sprintf("%s/%s/", target.goos, target.goarch)
147+
148+
build := golang.
149+
WithEnvVariable("CGO_ENABLED", "1").
150+
WithEnvVariable("GOEXPERIMENT", "jsonv2").
151+
WithEnvVariable("GOOS", target.goos).
152+
WithEnvVariable("GOARCH", target.goarch).
153+
WithEnvVariable("CC", target.cc).
154+
WithEnvVariable("CXX", target.cxx).
155+
WithEnvVariable("CGO_CFLAGS", target.cgoFlags).
156+
WithEnvVariable("CGO_LDFLAGS", target.cgoLdFlags).
157+
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapes"}).
158+
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapesprox"}).
159+
WithExec([]string{"go", "build", "-ldflags", ldflags, "-o", path, "./cli/tapesapi"})
160+
161+
outputs = outputs.WithDirectory(path, build.Directory(path))
54162
}
55163

56-
// return build directory
57164
return outputs
58165
}
59166

.dagger/main.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,14 @@ func (t *Tapes) Test(ctx context.Context) (string, error) {
3939
}
4040

4141
// testContainer returns a container configured for running tests
42+
// with a local gcc toolchain for CGO and sqlite dependencies.
4243
func (t *Tapes) testContainer() *dagger.Container {
4344
return dag.Container().
44-
From("golang:1.25-alpine").
45-
WithEnvVariable("CGO_ENABLED", "0").
45+
From("golang:1.25-bookworm").
46+
WithExec([]string{"apt-get", "update"}).
47+
WithExec([]string{"apt-get", "install", "-y", "gcc"}).
48+
WithExec([]string{"apt-get", "install", "-y", "libsqlite3-dev"}).
49+
WithEnvVariable("CGO_ENABLED", "1").
4650
WithEnvVariable("GOEXPERIMENT", "jsonv2").
4751
WithMountedCache("/go/pkg/mod", dag.CacheVolume("go-mod")).
4852
WithMountedCache("/root/.cache/go-build", dag.CacheVolume("go-build")).

.github/workflows/ci.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ jobs:
3636
build:
3737
name: Build
3838
runs-on: ubuntu-latest
39-
needs: test
4039

4140
steps:
4241
- name: Checkout repository

cmd/tapes/serve/proxy/proxy.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func NewProxyCmd() *cobra.Command {
6767
cmd.Flags().StringVarP(&cmder.upstream, "upstream", "u", "http://localhost:11434", "Upstream LLM provider URL")
6868
cmd.Flags().StringVarP(&cmder.providerType, "provider", "p", "ollama", "LLM provider type (anthropic, openai, ollama, besteffort)")
6969
cmd.Flags().StringVarP(&cmder.sqlitePath, "sqlite", "s", "", "Path to SQLite database (default: in-memory)")
70-
cmd.Flags().StringVar(&cmder.vectorStoreProvider, "vector-store-provider", "", "Vector store provider type (e.g., chroma)")
70+
cmd.Flags().StringVar(&cmder.vectorStoreProvider, "vector-store-provider", "sqlite", "Vector store provider type (e.g., chroma, sqlite)")
7171
cmd.Flags().StringVar(&cmder.vectorStoreTarget, "vector-store-target", "", "Vector store URL (e.g., http://localhost:8000)")
7272
cmd.Flags().StringVar(&cmder.embeddingProvider, "embedding-provider", "", "Embedding provider type (e.g., ollama)")
7373
cmd.Flags().StringVar(&cmder.embeddingTarget, "embedding-target", "", "Embedding provider URL")
@@ -105,8 +105,11 @@ func (c *proxyCommander) run() error {
105105

106106
config.VectorDriver, err = vectorutils.NewVectorDriver(&vectorutils.NewVectorDriverOpts{
107107
ProviderType: c.vectorStoreProvider,
108-
TargetURL: c.vectorStoreTarget,
108+
Target: c.vectorStoreTarget,
109109
Logger: c.logger,
110+
111+
// TODO - need to make this actually configurable
112+
Dimensions: 1024,
110113
})
111114
if err != nil {
112115
return fmt.Errorf("creating vector driver: %w", err)

cmd/tapes/serve/serve.go

Lines changed: 46 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"os"
77
"os/signal"
8+
"path/filepath"
89
"syscall"
910

1011
"github.com/spf13/cobra"
@@ -13,6 +14,7 @@ import (
1314
"github.com/papercomputeco/tapes/api"
1415
apicmder "github.com/papercomputeco/tapes/cmd/tapes/serve/api"
1516
proxycmder "github.com/papercomputeco/tapes/cmd/tapes/serve/proxy"
17+
"github.com/papercomputeco/tapes/pkg/dotdir"
1618
embeddingutils "github.com/papercomputeco/tapes/pkg/embeddings/utils"
1719
"github.com/papercomputeco/tapes/pkg/logger"
1820
"github.com/papercomputeco/tapes/pkg/merkle"
@@ -35,9 +37,10 @@ type ServeCommander struct {
3537
vectorStoreProvider string
3638
vectorStoreTarget string
3739

38-
embeddingProvider string
39-
embeddingTarget string
40-
embeddingModel string
40+
embeddingProvider string
41+
embeddingTarget string
42+
embeddingModel string
43+
embeddingDimensions uint
4144

4245
logger *zap.Logger
4346
}
@@ -71,16 +74,24 @@ func NewServeCmd() *cobra.Command {
7174
},
7275
}
7376

77+
dotdirManger := dotdir.NewManager()
78+
defaultTargetDir, err := dotdirManger.Target("")
79+
if err != nil {
80+
panic(err)
81+
}
82+
defaultTargetSqliteFile := filepath.Join(defaultTargetDir, "tapes.sqlite")
83+
7484
cmd.Flags().StringVarP(&cmder.proxyListen, "proxy-listen", "p", ":8080", "Address for proxy to listen on")
7585
cmd.Flags().StringVarP(&cmder.apiListen, "api-listen", "a", ":8081", "Address for API server to listen on")
7686
cmd.Flags().StringVarP(&cmder.upstream, "upstream", "u", "http://localhost:11434", "Upstream LLM provider URL")
7787
cmd.Flags().StringVar(&cmder.providerType, "provider", "ollama", "LLM provider type (anthropic, openai, ollama, besteffort)")
78-
cmd.Flags().StringVarP(&cmder.sqlitePath, "sqlite", "s", "", "Path to SQLite database (default: in-memory)")
79-
cmd.Flags().StringVar(&cmder.vectorStoreProvider, "vector-store-provider", "", "Vector store provider type (e.g., chroma)")
80-
cmd.Flags().StringVar(&cmder.vectorStoreTarget, "vector-store-target", "", "Vector store URL (e.g., http://localhost:8000)")
81-
cmd.Flags().StringVar(&cmder.embeddingProvider, "embedding-provider", "", "Embedding provider type (e.g., ollama)")
82-
cmd.Flags().StringVar(&cmder.embeddingTarget, "embedding-target", "", "Embedding provider URL")
83-
cmd.Flags().StringVar(&cmder.embeddingModel, "embedding-model", "", "Embedding model name (e.g., nomic-embed-text)")
88+
cmd.Flags().StringVarP(&cmder.sqlitePath, "sqlite", "s", defaultTargetSqliteFile, "Path to SQLite database (e.g., ./tapes.sqlite, in-memory)")
89+
cmd.Flags().StringVar(&cmder.vectorStoreProvider, "vector-store-provider", "sqlite", "Vector store provider type (e.g., chroma, sqlite)")
90+
cmd.Flags().StringVar(&cmder.vectorStoreTarget, "vector-store-target", defaultTargetSqliteFile, "Vector store target fielpath for sqlite or URL for vector store service (e.g., http://localhost:8000, ./db.sqlite)")
91+
cmd.Flags().StringVar(&cmder.embeddingProvider, "embedding-provider", "ollama", "Embedding provider type (e.g., ollama)")
92+
cmd.Flags().StringVar(&cmder.embeddingTarget, "embedding-target", "http://localhost:11434", "Embedding provider URL")
93+
cmd.Flags().StringVar(&cmder.embeddingModel, "embedding-model", "embeddinggemma", "Embedding model name (e.g., embeddinggemma, nomic-embed-text)")
94+
cmd.Flags().UintVar(&cmder.embeddingDimensions, "embedding-dimensions", 768, "Embedding dimensionality.")
8495

8596
cmd.AddCommand(apicmder.NewAPICmd())
8697
cmd.AddCommand(proxycmder.NewProxyCmd())
@@ -111,35 +122,34 @@ func (c *ServeCommander) run() error {
111122
ProviderType: c.providerType,
112123
}
113124

114-
if c.vectorStoreTarget != "" {
115-
proxyConfig.Embedder, err = embeddingutils.NewEmbedder(&embeddingutils.NewEmbedderOpts{
116-
ProviderType: c.embeddingProvider,
117-
TargetURL: c.embeddingTarget,
118-
Model: c.embeddingModel,
119-
})
120-
if err != nil {
121-
return fmt.Errorf("creating embedder: %w", err)
122-
}
123-
defer proxyConfig.Embedder.Close()
125+
proxyConfig.VectorDriver, err = vectorutils.NewVectorDriver(&vectorutils.NewVectorDriverOpts{
126+
ProviderType: c.vectorStoreProvider,
127+
Target: c.vectorStoreTarget,
128+
Dimensions: c.embeddingDimensions,
129+
Logger: c.logger,
130+
})
131+
if err != nil {
132+
return fmt.Errorf("creating vector driver: %w", err)
133+
}
134+
defer proxyConfig.VectorDriver.Close()
124135

125-
proxyConfig.VectorDriver, err = vectorutils.NewVectorDriver(&vectorutils.NewVectorDriverOpts{
126-
ProviderType: c.vectorStoreProvider,
127-
TargetURL: c.vectorStoreTarget,
128-
Logger: c.logger,
129-
})
130-
if err != nil {
131-
return fmt.Errorf("creating vector driver: %w", err)
132-
}
133-
defer proxyConfig.VectorDriver.Close()
134-
135-
c.logger.Info("vector storage enabled",
136-
zap.String("vector_store_provider", c.vectorStoreProvider),
137-
zap.String("vector_store_target", c.vectorStoreTarget),
138-
zap.String("embedding_provider", c.embeddingProvider),
139-
zap.String("embedding_target", c.embeddingTarget),
140-
zap.String("embedding_model", c.embeddingModel),
141-
)
136+
proxyConfig.Embedder, err = embeddingutils.NewEmbedder(&embeddingutils.NewEmbedderOpts{
137+
ProviderType: c.embeddingProvider,
138+
TargetURL: c.embeddingTarget,
139+
Model: c.embeddingModel,
140+
})
141+
if err != nil {
142+
return fmt.Errorf("creating embedder: %w", err)
142143
}
144+
defer proxyConfig.Embedder.Close()
145+
146+
c.logger.Info("vector storage enabled",
147+
zap.String("vector_store_provider", c.vectorStoreProvider),
148+
zap.String("vector_store_target", c.vectorStoreTarget),
149+
zap.String("embedding_provider", c.embeddingProvider),
150+
zap.String("embedding_target", c.embeddingTarget),
151+
zap.String("embedding_model", c.embeddingModel),
152+
)
143153

144154
// Create proxy
145155
p, err := proxy.New(proxyConfig, driver, c.logger)

dockerfiles/tapes.Dockerfile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
# -----------------------------------------------------------------------------
44
# Build stage
55
# -----------------------------------------------------------------------------
6-
FROM golang:1.25-alpine AS builder
6+
FROM golang:1.25-bookworm AS builder
7+
8+
# CGO and sqlite dependencies
9+
RUN apt-get update && apt-get install -y --no-install-recommends \
10+
gcc \
11+
libsqlite3-dev
712

813
WORKDIR /src
914

@@ -15,8 +20,8 @@ RUN go mod download
1520
COPY . .
1621

1722
ARG LDFLAGS="-s -w"
18-
RUN CGO_ENABLED=0 GOEXPERIMENT="jsonv2" go build \
19-
-ldflags="${LDFLAGS}" \
23+
RUN CGO_ENABLED=1 GOEXPERIMENT="jsonv2" go build \
24+
-ldflags="${LDFLAGS} -linkmode external -extldflags '-static'" \
2025
-o /bin/tapes \
2126
./cli/tapes
2227

0 commit comments

Comments
 (0)