diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml index 464ffc36c4f6..1bb850de1233 100644 --- a/.github/backend-matrix.yml +++ b/.github/backend-matrix.yml @@ -703,6 +703,19 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "8" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-locate-anything-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "8" @@ -1543,6 +1556,19 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-locate-anything-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -1569,6 +1595,19 @@ include: backend: "rfdetr-cpp" dockerfile: "./backend/Dockerfile.golang" context: "./" + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-cuda-13-arm64-locate-anything-cpp' + base-image: "ubuntu:24.04" + ubuntu-version: '2404' + runs-on: 'ubuntu-24.04-arm' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -2806,6 +2845,74 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' + # locate-anything-cpp + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-cpu-locate-anything-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f32' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f32-locate-anything-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'sycl_f16' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-intel-sycl-f16-locate-anything-cpp' + runs-on: 'ubuntu-latest' + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + platform-tag: 'amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-vulkan-locate-anything-cpp' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'vulkan' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/arm64' + platform-tag: 'arm64' + tag-latest: 'auto' + tag-suffix: '-gpu-vulkan-locate-anything-cpp' + runs-on: 'ubuntu-24.04-arm' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -2899,6 +3006,19 @@ include: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2204' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/arm64' + skip-drivers: 'false' + tag-latest: 'auto' + tag-suffix: '-nvidia-l4t-arm64-locate-anything-cpp' + base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + runs-on: 'ubuntu-24.04-arm' + backend: "locate-anything-cpp" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2204' # whisper - build-type: '' cuda-major-version: "" diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 5f1ac0c21525..f02414ef99a4 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -62,6 +62,10 @@ jobs: variable: "RFDETR_VERSION" branch: "main" file: "backend/go/rfdetr-cpp/Makefile" + - repository: "mudler/locate-anything.cpp" + variable: "LOCATEANYTHING_VERSION" + branch: "master" + file: "backend/go/locate-anything-cpp/Makefile" - repository: "predict-woo/qwen3-tts.cpp" variable: "QWEN3TTS_CPP_VERSION" branch: "main" diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 12c186ca2870..6c7b2b5e8d25 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -38,6 +38,7 @@ jobs: acestep-cpp: ${{ steps.detect.outputs.acestep-cpp }} qwen3-tts-cpp: ${{ steps.detect.outputs.qwen3-tts-cpp }} rfdetr-cpp: ${{ steps.detect.outputs.rfdetr-cpp }} + locate-anything-cpp: ${{ steps.detect.outputs.locate-anything-cpp }} vibevoice-cpp: ${{ steps.detect.outputs.vibevoice-cpp }} localvqe: ${{ steps.detect.outputs.localvqe }} voxtral: ${{ steps.detect.outputs.voxtral }} @@ -901,6 +902,45 @@ jobs: - name: Test rfdetr-cpp run: | make --jobs=5 --output-sync=target -C backend/go/rfdetr-cpp test + # Per-backend e2e for locate-anything-cpp: builds the .so + Go binary and + # runs `make -C backend/go/locate-anything-cpp test`. test.sh fetches the + # locate-anything-q8_0 GGUF (~6.3 GB, NVIDIA LocateAnything-3B) from the + # published mudler/locate-anything.cpp-gguf HF repo + a COCO image, then the + # Go wire test loads the model and runs an open-vocabulary Detect, asserting + # at least one labeled box. Heavier than the other Go backends (it is a 3B), + # so it is gated to changes under backend/go/locate-anything-cpp/. + tests-locate-anything-cpp: + needs: detect-changes + if: needs.detect-changes.outputs.locate-anything-cpp == 'true' || needs.detect-changes.outputs.run-all == 'true' + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake curl libopenblas-dev + - name: Setup Go + uses: actions/setup-go@v5 + - name: Display Go version + run: go version + - name: Proto Dependencies + run: | + # Install protoc + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + PATH="$PATH:$HOME/go/bin" make protogen-go + - name: Build locate-anything-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/locate-anything-cpp + - name: Test locate-anything-cpp + run: | + make --jobs=5 --output-sync=target -C backend/go/locate-anything-cpp test # Per-backend smoke for vibevoice-cpp: builds the .so + Go binary and # runs `make -C backend/go/vibevoice-cpp test`. test.sh auto-downloads # the published mudler/vibevoice.cpp-models bundle (TTS Q8_0 + ASR Q4_K diff --git a/Makefile b/Makefile index cafcdd44a692..8195c06fc575 100644 --- a/Makefile +++ b/Makefile @@ -566,6 +566,7 @@ prepare-test-extra: protogen-python $(MAKE) -C backend/python/speaker-recognition $(MAKE) -C backend/rust/kokoros kokoros-grpc $(MAKE) -C backend/go/rfdetr-cpp + $(MAKE) -C backend/go/locate-anything-cpp test-extra: prepare-test-extra $(MAKE) -C backend/python/transformers test @@ -593,6 +594,7 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/speaker-recognition test $(MAKE) -C backend/rust/kokoros test $(MAKE) -C backend/go/rfdetr-cpp test + $(MAKE) -C backend/go/locate-anything-cpp test ## ## End-to-end gRPC tests that exercise a built backend container image. diff --git a/backend/go/locate-anything-cpp/.gitignore b/backend/go/locate-anything-cpp/.gitignore new file mode 100644 index 000000000000..b1d824c3431d --- /dev/null +++ b/backend/go/locate-anything-cpp/.gitignore @@ -0,0 +1,7 @@ +sources/ +build*/ +package/ +liblocateanythingcpp*.so +locate-anything-cpp +test-models/ +test-data/ diff --git a/backend/go/locate-anything-cpp/CMakeLists.txt b/backend/go/locate-anything-cpp/CMakeLists.txt new file mode 100644 index 000000000000..22d6232b8469 --- /dev/null +++ b/backend/go/locate-anything-cpp/CMakeLists.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 3.18) +project(liblocateanythingcpp LANGUAGES C CXX) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Static-link ggml + locate_anything so the resulting .so has no runtime +# dependency on extra ggml/locate_anything shared libraries — only on +# libc/libstdc++/libgomp, which the LocalAI package step bundles into the +# docker image. +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE) + +# locate-anything.cpp build switches: skip CLI/tests, keep static lib. +set(LA_BUILD_CLI OFF CACHE BOOL "Disable locate-anything CLI" FORCE) +set(LA_BUILD_TESTS OFF CACHE BOOL "Disable locate-anything tests" FORCE) +set(LA_SHARED OFF CACHE BOOL "Build locate_anything as static lib" FORCE) + +# Unlike rt-detr.cpp, locate-anything.cpp ships no in-tree ggml patches, so +# there is no apply_ggml_patches.sh hook to shim here. +add_subdirectory(./sources/locate-anything.cpp) + +# locate-anything.cpp's top-level CMakeLists points its own target's include +# dirs at ${CMAKE_SOURCE_DIR}/{include,src,third_party,...}. CMAKE_SOURCE_DIR +# is the *top-level* source dir of the whole CMake tree, so when we pull it in +# via add_subdirectory it resolves to OUR directory, not theirs, and the +# locate_anything target fails to find its own headers (la_capi.h, stb_image.h, +# la_gguf_keys.h). Re-add the correct, subdir-relative include paths to the +# already-defined target so it compiles regardless of where it's nested. +set(LA_SRC ${CMAKE_CURRENT_SOURCE_DIR}/sources/locate-anything.cpp) +target_include_directories(locate_anything PRIVATE + ${LA_SRC}/include + ${LA_SRC}/src + ${LA_SRC}/third_party + ${LA_SRC}/third_party/stb) + +# locate-anything.cpp's C-API symbols already live inside liblocate_anything +# (src/la_capi.cpp is compiled into the lib). We re-export them via a MODULE +# library that links locate_anything so the symbols are visible at dlopen time. +add_library(locateanythingcpp MODULE + sources/locate-anything.cpp/src/la_capi.cpp) + +target_include_directories(locateanythingcpp PRIVATE + sources/locate-anything.cpp/include + sources/locate-anything.cpp/src + sources/locate-anything.cpp/third_party + sources/locate-anything.cpp/third_party/stb +) + +target_link_libraries(locateanythingcpp PRIVATE locate_anything ggml) + +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) + target_link_libraries(locateanythingcpp PRIVATE stdc++fs) +endif() + +set_property(TARGET locateanythingcpp PROPERTY CXX_STANDARD 17) +set_target_properties(locateanythingcpp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) diff --git a/backend/go/locate-anything-cpp/Makefile b/backend/go/locate-anything-cpp/Makefile new file mode 100644 index 000000000000..621c92fa59c6 --- /dev/null +++ b/backend/go/locate-anything-cpp/Makefile @@ -0,0 +1,134 @@ +CMAKE_ARGS?= +BUILD_TYPE?= +NATIVE?=false + +GOCMD?=go +GO_TAGS?= +JOBS?=$(shell nproc --ignore=1) + +# locate-anything.cpp. Pin to a specific commit for a stable build; leaving +# this on `master` always picks up the latest C-API surface (incl. the +# per-detection accessor functions used by golocateanythingcpp.go). +LOCATEANYTHING_REPO?=https://github.com/mudler/locate-anything.cpp.git +LOCATEANYTHING_VERSION?=60e450945476d5e97e0754a8c0e71a9ea81690e0 + +ifeq ($(NATIVE),false) + CMAKE_ARGS+=-DGGML_NATIVE=OFF +endif + +# Forward LocalAI's BUILD_TYPE to the matching ggml backend switch. +ifeq ($(BUILD_TYPE),cublas) + CMAKE_ARGS+=-DGGML_CUDA=ON -DLA_GGML_CUDA=ON +else ifeq ($(BUILD_TYPE),openblas) + CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS +else ifeq ($(BUILD_TYPE),clblas) + CMAKE_ARGS+=-DGGML_CLBLAST=ON +else ifeq ($(BUILD_TYPE),hipblas) + ROCM_HOME ?= /opt/rocm + ROCM_PATH ?= /opt/rocm + export CXX=$(ROCM_HOME)/llvm/bin/clang++ + export CC=$(ROCM_HOME)/llvm/bin/clang + AMDGPU_TARGETS?=gfx908,gfx90a,gfx942,gfx950,gfx1030,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) +else ifeq ($(BUILD_TYPE),vulkan) + CMAKE_ARGS+=-DGGML_VULKAN=ON -DLA_GGML_VULKAN=ON +else ifeq ($(OS),Darwin) + ifneq ($(BUILD_TYPE),metal) + CMAKE_ARGS+=-DGGML_METAL=OFF + else + CMAKE_ARGS+=-DGGML_METAL=ON + CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON + CMAKE_ARGS+=-DLA_GGML_METAL=ON + endif +endif + +ifeq ($(BUILD_TYPE),sycl_f16) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DGGML_SYCL_F16=ON +endif + +ifeq ($(BUILD_TYPE),sycl_f32) + CMAKE_ARGS+=-DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx +endif + +sources/locate-anything.cpp: + mkdir -p sources && \ + git clone --recursive $(LOCATEANYTHING_REPO) sources/locate-anything.cpp && \ + cd sources/locate-anything.cpp && \ + git checkout $(LOCATEANYTHING_VERSION) && \ + git submodule update --init --recursive --depth 1 --single-branch + +# Detect OS +UNAME_S := $(shell uname -s) + +# Only build CPU variants on Linux +ifeq ($(UNAME_S),Linux) + VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so +else + # On non-Linux (e.g., Darwin), build only fallback variant + VARIANT_TARGETS = liblocateanythingcpp-fallback.so +endif + +locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS) + CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o locate-anything-cpp ./ + +package: locate-anything-cpp + bash package.sh + +build: package + +clean: purge + rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources + +purge: + rm -rf build* + +# Build all variants (Linux only) +ifeq ($(UNAME_S),Linux) +liblocateanythingcpp-avx.so: sources/locate-anything.cpp + rm -rfv build-$@ + $(info ${GREEN}I locate-anything-cpp build info:avx${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom + rm -rfv build-$@ + +liblocateanythingcpp-avx2.so: sources/locate-anything.cpp + rm -rfv build-$@ + $(info ${GREEN}I locate-anything-cpp build info:avx2${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) liblocateanythingcpp-custom + rm -rfv build-$@ + +liblocateanythingcpp-avx512.so: sources/locate-anything.cpp + rm -rfv build-$@ + $(info ${GREEN}I locate-anything-cpp build info:avx512${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) liblocateanythingcpp-custom + rm -rfv build-$@ +endif + +# Build fallback variant (all platforms) +liblocateanythingcpp-fallback.so: sources/locate-anything.cpp + rm -rfv build-$@ + $(info ${GREEN}I locate-anything-cpp build info:fallback${RESET}) + SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom + rm -rfv build-$@ + +liblocateanythingcpp-custom: CMakeLists.txt + mkdir -p build-$(SO_TARGET) && \ + cd build-$(SO_TARGET) && \ + cmake .. $(CMAKE_ARGS) && \ + cmake --build . --config Release -j$(JOBS) && \ + cd .. && \ + mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) + +all: locate-anything-cpp package + +# `test` is invoked by the top-level Makefile's `test-extra` target. It builds +# the backend binary + the fallback shared library (needed for dlopen at +# runtime), then runs test.sh which downloads the q8_0 GGUF + COCO image and +# exercises the gRPC Load/Detect wire path via the Go smoke test in +# main_test.go. +test: locate-anything-cpp liblocateanythingcpp-fallback.so + bash test.sh diff --git a/backend/go/locate-anything-cpp/golocateanythingcpp.go b/backend/go/locate-anything-cpp/golocateanythingcpp.go new file mode 100644 index 000000000000..25c7b80c52bd --- /dev/null +++ b/backend/go/locate-anything-cpp/golocateanythingcpp.go @@ -0,0 +1,174 @@ +package main + +// golocateanythingcpp.go - gRPC handlers (Load, Detect) for the +// locate-anything-cpp backend. +// +// Embeds base.SingleThread to default unimplemented RPCs to "not supported" +// while we only implement open-vocabulary object detection (Detect). + +import ( + "encoding/base64" + "fmt" + "os" + "path/filepath" + "unsafe" + + "github.com/mudler/LocalAI/pkg/grpc/base" + pb "github.com/mudler/LocalAI/pkg/grpc/proto" +) + +// la_ctx* is an opaque handle. la_capi_load returns it directly (0 == failure), +// unlike rfdetr's out-parameter convention. +var ( + // la_capi_load(const char* gguf_path, int n_threads) -> la_ctx* (0 = fail) + CapiLoad func(gguf string, nThreads int32) uintptr + // la_capi_free(la_ctx* ctx) + CapiFree func(handle uintptr) + // la_capi_locate_path(ctx, image_path, prompt, mode) -> char* json (0 = err) + CapiLocatePath func(handle uintptr, imagePath string, prompt string, mode int32) uintptr + // la_capi_locate_buffer(ctx, bytes, len, prompt, mode) -> char* json (0 = err) + CapiLocateBuffer func(handle uintptr, bytes uintptr, length uintptr, prompt string, mode int32) uintptr + // la_capi_get_n_detections(ctx) -> int + CapiGetNDetections func(handle uintptr) int32 + // la_capi_get_detection_box(ctx, i, out_xyxy[4]) -> int (0 on success) + CapiGetDetectionBox func(handle uintptr, i int32, outXYXY uintptr) int32 + // la_capi_get_detection_label(ctx, i, buf, buf_size) -> int (required size incl NUL; two-call sizing) + CapiGetDetectionLabel func(handle uintptr, i int32, buf uintptr, bufSize int32) int32 + // la_capi_free_string(char* s) + CapiFreeString func(s uintptr) + // la_capi_last_error(ctx) -> const char* (owned by ctx, "" if none / null ctx). + // purego marshals the returned C string into a Go string (a copy), so we + // never free it and avoid raw pointer arithmetic. + CapiLastError func(handle uintptr) string +) + +type LocateAnythingCpp struct { + base.SingleThread + handle uintptr +} + +// Load loads the GGUF model at opts.ModelFile (joined with opts.ModelPath if +// relative) and stores the la_ctx handle for later Detect calls. +func (r *LocateAnythingCpp) Load(opts *pb.ModelOptions) error { + modelFile := opts.ModelFile + if modelFile == "" { + modelFile = opts.Model + } + if modelFile == "" { + return fmt.Errorf("locate-anything-cpp: ModelFile is empty") + } + + var modelPath string + if filepath.IsAbs(modelFile) { + modelPath = modelFile + } else { + modelPath = filepath.Join(opts.ModelPath, modelFile) + } + + if _, err := os.Stat(modelPath); err != nil { + return fmt.Errorf("locate-anything-cpp: model file not found: %s: %w", modelPath, err) + } + + threads := opts.Threads + if threads <= 0 { + threads = 4 + } + + // Release previous model if any (re-Load). + if r.handle != 0 { + CapiFree(r.handle) + r.handle = 0 + } + + h := CapiLoad(modelPath, threads) + if h == 0 { + // la_capi_last_error needs a ctx; on a failed load we have none (it + // returns "" for a null ctx), so the text is best-effort. Surface it + // when present. + if msg := CapiLastError(0); msg != "" { + return fmt.Errorf("locate-anything-cpp: la_capi_load failed for %s: %s", modelPath, msg) + } + return fmt.Errorf("locate-anything-cpp: la_capi_load failed for %s", modelPath) + } + r.handle = h + return nil +} + +// Detect runs open-vocabulary detection on the base64-encoded image in opts.Src +// using the required text prompt in opts.Prompt, returning one pb.Detection per +// located object with its predicted label as ClassName. +func (r *LocateAnythingCpp) Detect(opts *pb.DetectOptions) (pb.DetectResponse, error) { + if r.handle == 0 { + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: model not loaded") + } + + // Open-vocabulary detection is prompt-driven; without a prompt there is + // nothing to locate. + prompt := opts.Prompt + if prompt == "" { + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: a text prompt is required (open-vocabulary detection)") + } + + // Decode base64 image and write to temp file. + imgData, err := base64.StdEncoding.DecodeString(opts.Src) + if err != nil { + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: failed to decode base64 image: %w", err) + } + + tmpFile, err := os.CreateTemp("", "locate-anything-*.img") + if err != nil { + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: failed to create temp file: %w", err) + } + defer func() { _ = os.Remove(tmpFile.Name()) }() + + if _, err := tmpFile.Write(imgData); err != nil { + _ = tmpFile.Close() + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: failed to write temp file: %w", err) + } + if err := tmpFile.Close(); err != nil { + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: failed to close temp file: %w", err) + } + + // mode 0 = hybrid (Parallel Box Decoding). The JSON return value is unused: + // structured detections are read via the accessor functions. Still must + // free the returned string. + jsonPtr := CapiLocatePath(r.handle, tmpFile.Name(), prompt, 0) + if jsonPtr != 0 { + CapiFreeString(jsonPtr) + } + + n := CapiGetNDetections(r.handle) + if n < 0 { + return pb.DetectResponse{}, fmt.Errorf("locate-anything-cpp: invalid n_detections=%d", n) + } + + detections := make([]*pb.Detection, 0, n) + for i := int32(0); i < n; i++ { + var xyxy [4]float32 // x1, y1, x2, y2 + if CapiGetDetectionBox(r.handle, i, uintptr(unsafe.Pointer(&xyxy[0]))) != 0 { + continue + } + + // Two-call sizing for the label string. + label := "" + need := CapiGetDetectionLabel(r.handle, i, 0, 0) + if need > 0 { + buf := make([]byte, need) + CapiGetDetectionLabel(r.handle, i, uintptr(unsafe.Pointer(&buf[0])), need) + label = string(buf[:need-1]) + } + + detections = append(detections, &pb.Detection{ + X: xyxy[0], + Y: xyxy[1], + Width: xyxy[2] - xyxy[0], + Height: xyxy[3] - xyxy[1], + Confidence: 1.0, + ClassName: label, + }) + } + + return pb.DetectResponse{ + Detections: detections, + }, nil +} diff --git a/backend/go/locate-anything-cpp/main.go b/backend/go/locate-anything-cpp/main.go new file mode 100644 index 000000000000..91ccaf38e436 --- /dev/null +++ b/backend/go/locate-anything-cpp/main.go @@ -0,0 +1,59 @@ +package main + +// main.go - entry point for the locate-anything-cpp gRPC backend. +// +// Dlopens liblocateanythingcpp-.so via purego at the path in +// LOCATEANYTHING_LIBRARY (set by run.sh based on /proc/cpuinfo), registers +// the la_capi_* C ABI symbols, then starts the gRPC server. + +import ( + "flag" + "os" + + "github.com/ebitengine/purego" + grpc "github.com/mudler/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +type LibFuncs struct { + FuncPtr any + Name string +} + +func main() { + // Get library name from environment variable, default to fallback + libName := os.Getenv("LOCATEANYTHING_LIBRARY") + if libName == "" { + libName = "./liblocateanythingcpp-fallback.so" + } + + lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) + if err != nil { + panic(err) + } + + libFuncs := []LibFuncs{ + {&CapiLoad, "la_capi_load"}, + {&CapiFree, "la_capi_free"}, + {&CapiLocatePath, "la_capi_locate_path"}, + {&CapiLocateBuffer, "la_capi_locate_buffer"}, + {&CapiGetNDetections, "la_capi_get_n_detections"}, + {&CapiGetDetectionBox, "la_capi_get_detection_box"}, + {&CapiGetDetectionLabel, "la_capi_get_detection_label"}, + {&CapiFreeString, "la_capi_free_string"}, + {&CapiLastError, "la_capi_last_error"}, + } + + for _, lf := range libFuncs { + purego.RegisterLibFunc(lf.FuncPtr, lib, lf.Name) + } + + flag.Parse() + + if err := grpc.StartServer(*addr, &LocateAnythingCpp{}); err != nil { + panic(err) + } +} diff --git a/backend/go/locate-anything-cpp/main_test.go b/backend/go/locate-anything-cpp/main_test.go new file mode 100644 index 000000000000..fba811c93e15 --- /dev/null +++ b/backend/go/locate-anything-cpp/main_test.go @@ -0,0 +1,176 @@ +package main + +// main_test.go - end-to-end smoke test for the locate-anything-cpp gRPC backend. +// +// Spawns the compiled locate-anything-cpp binary on a free local port, dials it +// via gRPC, and exercises LoadModel + Detect against the test fixtures +// downloaded by test.sh: the q8_0 GGUF of nvidia/LocateAnything-3B and a real +// COCO image with people + cars. Asserts that open-vocabulary detection driven +// by a text prompt returns at least one detection, each carrying a non-empty +// class name and a bounding box of non-zero size. +// +// The spec Skip()s cleanly if its fixtures (the ~6.3 GB model, the test image, +// the built binary, or the fallback .so) are missing, so the test target stays +// usable on a fresh checkout / on CI runners where the large model hasn't been +// downloaded. + +import ( + "context" + "encoding/base64" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + pb "github.com/mudler/LocalAI/pkg/grpc/proto" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +func TestDetect(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "locate-anything-cpp backend smoke suite") +} + +// freePort grabs an ephemeral TCP port and immediately releases it so the +// spawned backend can bind to it. There is a tiny TOCTOU window here but in +// practice it's adequate for a smoke test on a quiet runner. +func freePort() int { + l, err := net.Listen("tcp", "127.0.0.1:0") + Expect(err).ToNot(HaveOccurred(), "freePort listen") + port := l.Addr().(*net.TCPAddr).Port + Expect(l.Close()).To(Succeed()) + return port +} + +// startBackend spawns the locate-anything-cpp binary on the given port and +// waits until it accepts TCP connections (up to 10s). It mirrors how main.go +// resolves the purego library: the LOCATEANYTHING_LIBRARY env var points the +// dlopen at the freshly built fallback .so, and the la_capi_* symbols are +// registered there. The returned cleanup func kills the process and reaps it. +func startBackend(port int) func() { + binary, err := filepath.Abs("./locate-anything-cpp") + Expect(err).ToNot(HaveOccurred()) + if _, err := os.Stat(binary); err != nil { + Skip(fmt.Sprintf("backend binary not built: %s (run `make locate-anything-cpp` first)", binary)) + } + + libPath, err := filepath.Abs("./liblocateanythingcpp-fallback.so") + Expect(err).ToNot(HaveOccurred()) + if _, err := os.Stat(libPath); err != nil { + Skip(fmt.Sprintf("fallback library not built: %s (run `make liblocateanythingcpp-fallback.so` first)", libPath)) + } + + addr := fmt.Sprintf("127.0.0.1:%d", port) + cmd := exec.Command(binary, "--addr", addr) + cmd.Env = append(os.Environ(), "LOCATEANYTHING_LIBRARY="+libPath) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + Expect(cmd.Start()).To(Succeed()) + + cleanup := func() { + if cmd.Process != nil { + _ = cmd.Process.Kill() + _, _ = cmd.Process.Wait() + } + } + + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { + c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond) + if err == nil { + _ = c.Close() + return cleanup + } + time.Sleep(200 * time.Millisecond) + } + + cleanup() + Fail(fmt.Sprintf("backend did not become ready on %s within 10s", addr)) + return func() {} +} + +// loadTestImage reads the COCO test image downloaded by test.sh and returns its +// base64-encoded content (the wire format accepted by the Detect RPC). +func loadTestImage() string { + imgPath, err := filepath.Abs("test-data/test.jpg") + Expect(err).ToNot(HaveOccurred()) + imgBytes, err := os.ReadFile(imgPath) + if err != nil { + Skip(fmt.Sprintf("test image not present: %s (run test.sh first)", imgPath)) + } + return base64.StdEncoding.EncodeToString(imgBytes) +} + +// dialBackend opens a gRPC client connection to the spawned backend. +func dialBackend(port int) (pb.BackendClient, func()) { + addr := fmt.Sprintf("127.0.0.1:%d", port) + conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) + Expect(err).ToNot(HaveOccurred()) + return pb.NewBackendClient(conn), func() { _ = conn.Close() } +} + +// modelPathOrSkip resolves the model file under ./test-models/ and Skip()s the +// current spec if it's missing (the ~6.3 GB GGUF is not present on a fresh +// checkout / on CI runners without the download). +func modelPathOrSkip(name string) string { + modelDir, err := filepath.Abs("test-models") + Expect(err).ToNot(HaveOccurred()) + modelPath := filepath.Join(modelDir, name) + if _, err := os.Stat(modelPath); err != nil { + Skip(fmt.Sprintf("model not present: %s (run test.sh first)", modelPath)) + } + return modelPath +} + +var _ = Describe("locate-anything-cpp backend", func() { + It("runs open-vocabulary detection against a known-good COCO image", func() { + modelPath := modelPathOrSkip("locate-anything-q8_0.gguf") + imgB64 := loadTestImage() + + port := freePort() + cleanup := startBackend(port) + defer cleanup() + + client, closeConn := dialBackend(port) + defer closeConn() + + // The q8_0 model is ~6.3 GB and hybrid Parallel Box Decoding on CPU is + // not cheap, so give LoadModel + Detect a generous deadline. + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute) + defer cancel() + + loadResp, err := client.LoadModel(ctx, &pb.ModelOptions{ + Model: "locate-anything-q8_0.gguf", + ModelFile: modelPath, + Threads: 4, + }) + Expect(err).ToNot(HaveOccurred(), "LoadModel") + Expect(loadResp.GetSuccess()).To(BeTrue(), "LoadModel reported failure: %s", loadResp.GetMessage()) + + // Open-vocabulary detection is prompt-driven; the prompt names the + // classes to locate (people + cars), separated by the control token. + detResp, err := client.Detect(ctx, &pb.DetectOptions{ + Src: imgB64, + Prompt: "Locate all the instances that matches the following description: personcar.", + }) + Expect(err).ToNot(HaveOccurred(), "Detect") + Expect(detResp.GetDetections()).ToNot(BeEmpty(), "no detections returned on a known-good COCO image") + + _, _ = fmt.Fprintf(GinkgoWriter, "detection OK: %d detections\n", len(detResp.GetDetections())) + for i, d := range detResp.GetDetections() { + Expect(d.GetClassName()).ToNot(BeEmpty(), "detection %d has empty class_name", i) + Expect(d.GetWidth()).To(BeNumerically(">", float32(0)), + "detection %d has non-positive width", i) + Expect(d.GetHeight()).To(BeNumerically(">", float32(0)), + "detection %d has non-positive height", i) + _, _ = fmt.Fprintf(GinkgoWriter, " [%d] %s box=(%.1f,%.1f,%.1fx%.1f)\n", + i, d.GetClassName(), d.GetX(), d.GetY(), d.GetWidth(), d.GetHeight()) + } + }) +}) diff --git a/backend/go/locate-anything-cpp/package.sh b/backend/go/locate-anything-cpp/package.sh new file mode 100755 index 000000000000..3b1f13428c36 --- /dev/null +++ b/backend/go/locate-anything-cpp/package.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# Script to copy the appropriate libraries based on architecture + +set -e + +CURDIR=$(dirname "$(realpath $0)") +REPO_ROOT="${CURDIR}/../../.." + +# Create lib directory +mkdir -p $CURDIR/package/lib + +cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ +cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/ +cp -fv $CURDIR/run.sh $CURDIR/package/ + +# Detect architecture and copy appropriate libraries +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + # x86_64 architecture + echo "Detected x86_64 architecture, copying x86_64 libraries..." + cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so + cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + # ARM64 architecture + echo "Detected ARM64 architecture, copying ARM64 libraries..." + cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so + cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6 + cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 + cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 + cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 +elif [ $(uname -s) = "Darwin" ]; then + echo "Detected Darwin" +else + echo "Error: Could not detect architecture" + exit 1 +fi + +# Package GPU libraries based on BUILD_TYPE +GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh" +if [ -f "$GPU_LIB_SCRIPT" ]; then + echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..." + source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib" + package_gpu_libs +fi + +echo "Packaging completed successfully" +ls -liah $CURDIR/package/ +ls -liah $CURDIR/package/lib/ diff --git a/backend/go/locate-anything-cpp/run.sh b/backend/go/locate-anything-cpp/run.sh new file mode 100755 index 000000000000..cefbff629463 --- /dev/null +++ b/backend/go/locate-anything-cpp/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -ex + +# Get the absolute current dir where the script is located +CURDIR=$(dirname "$(realpath $0)") + +cd / + +echo "CPU info:" +if [ "$(uname)" != "Darwin" ]; then + grep -e "model\sname" /proc/cpuinfo | head -1 + grep -e "flags" /proc/cpuinfo | head -1 +fi + +LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so" + +if [ "$(uname)" != "Darwin" ]; then + if grep -q -e "\savx\s" /proc/cpuinfo ; then + echo "CPU: AVX found OK" + if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then + LIBRARY="$CURDIR/liblocateanythingcpp-avx.so" + fi + fi + + if grep -q -e "\savx2\s" /proc/cpuinfo ; then + echo "CPU: AVX2 found OK" + if [ -e $CURDIR/liblocateanythingcpp-avx2.so ]; then + LIBRARY="$CURDIR/liblocateanythingcpp-avx2.so" + fi + fi + + # Check avx 512 + if grep -q -e "\savx512f\s" /proc/cpuinfo ; then + echo "CPU: AVX512F found OK" + if [ -e $CURDIR/liblocateanythingcpp-avx512.so ]; then + LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so" + fi + fi +fi + +export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH +export LOCATEANYTHING_LIBRARY=$LIBRARY + +# If there is a lib/ld.so, use it +if [ -f $CURDIR/lib/ld.so ]; then + echo "Using lib/ld.so" + echo "Using library: $LIBRARY" + exec $CURDIR/lib/ld.so $CURDIR/locate-anything-cpp "$@" +fi + +echo "Using library: $LIBRARY" +exec $CURDIR/locate-anything-cpp "$@" diff --git a/backend/go/locate-anything-cpp/test.sh b/backend/go/locate-anything-cpp/test.sh new file mode 100755 index 000000000000..0a088a593f3e --- /dev/null +++ b/backend/go/locate-anything-cpp/test.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath $0)") + +echo "Running locate-anything-cpp backend tests..." + +# Test model from the mudler/locate-anything.cpp-gguf HuggingFace repo. This is +# the q8_0 quantization of nvidia/LocateAnything-3B (~6.3 GB), so the download +# is the slow step. It is resumed with `curl -C -` and skipped entirely if the +# file is already present. +LOCATEANYTHING_MODEL_DIR="${LOCATEANYTHING_MODEL_DIR:-$CURDIR/test-models}" + +LOCATEANYTHING_MODEL_FILE="${LOCATEANYTHING_MODEL_FILE:-locate-anything-q8_0.gguf}" +LOCATEANYTHING_MODEL_URL="${LOCATEANYTHING_MODEL_URL:-https://huggingface.co/mudler/locate-anything.cpp-gguf/resolve/main/locate-anything-q8_0.gguf}" + +mkdir -p "$LOCATEANYTHING_MODEL_DIR" + +if [ ! -f "$LOCATEANYTHING_MODEL_DIR/$LOCATEANYTHING_MODEL_FILE" ]; then + echo "Downloading locate-anything q8_0 model (~6.3 GB, this is slow)..." + # -C - resumes a partial download so an interrupted run doesn't restart from 0. + curl -L -C - -o "$LOCATEANYTHING_MODEL_DIR/$LOCATEANYTHING_MODEL_FILE" "$LOCATEANYTHING_MODEL_URL" --progress-bar +fi + +# Use a real COCO test image (people + cars) from the upstream rf-detr.cpp repo +# (~46 KB). Open-vocabulary detection needs real content to locate, so a +# synthetic image would trivially yield zero detections. +TEST_IMAGE_DIR="$CURDIR/test-data" +TEST_IMAGE_FILE="$TEST_IMAGE_DIR/test.jpg" +TEST_IMAGE_URL="${TEST_IMAGE_URL:-https://raw.githubusercontent.com/mudler/rf-detr.cpp/main/tests/fixtures/ci/test_image.jpg}" + +mkdir -p "$TEST_IMAGE_DIR" +if [ ! -f "$TEST_IMAGE_FILE" ]; then + echo "Downloading COCO test image..." + curl -L -o "$TEST_IMAGE_FILE" "$TEST_IMAGE_URL" --progress-bar +fi + +echo "locate-anything-cpp test setup complete." +echo " model: $LOCATEANYTHING_MODEL_DIR/$LOCATEANYTHING_MODEL_FILE" +echo " test image: $TEST_IMAGE_FILE" + +# Run the Go smoke test: spawns the backend binary on a free port, calls +# LoadModel + Detect via gRPC against the downloaded GGUF + COCO image. +echo "" +echo "Running Go smoke test..." +cd "$CURDIR" +go test -v -timeout 30m ./... diff --git a/backend/index.yaml b/backend/index.yaml index 37e6890710e4..e641c935512b 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -337,6 +337,35 @@ nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp" intel: "intel-sycl-f32-rfdetr-cpp" vulkan: "vulkan-rfdetr-cpp" +- &locateanything + name: "locate-anything" + alias: "locate-anything" + license: apache-2.0 + description: | + Open-vocabulary object detection and visual grounding (NVIDIA + LocateAnything-3B) in C/C++ using GGML. Loads pre-built GGUF weights + and, given an image and a free-form text prompt, returns bounding + boxes, class labels, and confidence scores for the referred objects. + urls: + - https://github.com/mudler/locate-anything.cpp + - https://huggingface.co/nvidia/LocateAnything-3B + tags: + - object-detection + - visual-grounding + - open-vocabulary + - locate-anything + - gpu + - cpu + capabilities: + default: "cpu-locate-anything-cpp" + nvidia: "cuda12-locate-anything-cpp" + nvidia-cuda-12: "cuda12-locate-anything-cpp" + nvidia-cuda-13: "cuda13-locate-anything-cpp" + nvidia-l4t: "nvidia-l4t-arm64-locate-anything-cpp" + nvidia-l4t-cuda-12: "nvidia-l4t-arm64-locate-anything-cpp" + nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-locate-anything-cpp" + intel: "intel-sycl-f32-locate-anything-cpp" + vulkan: "vulkan-locate-anything-cpp" - &vllm name: "vllm" license: apache-2.0 diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go index dddcb1cffe0b..a897e037f513 100644 --- a/core/gallery/importers/importers.go +++ b/core/gallery/importers/importers.go @@ -158,6 +158,11 @@ var defaultImporters = []Importer{ // RFDetrImporter must run before TransformersImporter — RF-DETR // checkpoints may carry tokenizer-adjacent artefacts. &RFDetrImporter{}, + // LocateAnythingImporter (NVIDIA LocateAnything open-vocab detection, + // native C++/ggml port) must run before LlamaCPPImporter so its GGUF + // bundles aren't claimed by the generic .gguf importer; kept next to + // RFDetrImporter as both are detection models. + &LocateAnythingImporter{}, // Existing // DS4Importer must precede LlamaCPPImporter - ds4 weights are GGUFs and // would otherwise be claimed by the generic .gguf-handling llama-cpp diff --git a/core/gallery/importers/locate-anything.go b/core/gallery/importers/locate-anything.go new file mode 100644 index 000000000000..5dc87aa8843c --- /dev/null +++ b/core/gallery/importers/locate-anything.go @@ -0,0 +1,137 @@ +package importers + +import ( + "encoding/json" + "path/filepath" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/schema" + "go.yaml.in/yaml/v2" +) + +var _ Importer = &LocateAnythingImporter{} + +// LocateAnythingImporter routes NVIDIA LocateAnything open-vocabulary +// object-detection / visual-grounding repositories to the +// "locate-anything-cpp" backend (a native C++/ggml port). It must be +// registered BEFORE the generic GGUF matchers (LlamaCPPImporter) so its +// GGUF bundles aren't swallowed by the generic .gguf-handling importer, +// and alongside RFDetrImporter since both are detection models that may +// carry tokenizer-adjacent artefacts. +// +// Detection signals: +// - preferences.backend="locate-anything-cpp" (explicit override); +// - repo name contains "locate-anything" or "locateanything" +// (case-insensitive). +type LocateAnythingImporter struct{} + +func (i *LocateAnythingImporter) Name() string { return "locate-anything-cpp" } +func (i *LocateAnythingImporter) Modality() string { return "detection" } +func (i *LocateAnythingImporter) AutoDetects() bool { return true } + +func repoLooksLikeLocateAnything(repo string) bool { + lower := strings.ToLower(repo) + return strings.Contains(lower, "locate-anything") || + strings.Contains(lower, "locateanything") || + strings.Contains(lower, "locate-anything.cpp") || + strings.Contains(lower, "locate-anything-cpp") +} + +func (i *LocateAnythingImporter) Match(details Details) bool { + preferences, err := details.Preferences.MarshalJSON() + if err != nil { + return false + } + preferencesMap := make(map[string]any) + if len(preferences) > 0 { + if err := json.Unmarshal(preferences, &preferencesMap); err != nil { + return false + } + } + + if b, ok := preferencesMap["backend"].(string); ok && b == "locate-anything-cpp" { + return true + } + + if details.HuggingFace != nil { + repoName := details.HuggingFace.ModelID + if idx := strings.Index(repoName, "/"); idx >= 0 { + repoName = repoName[idx+1:] + } + if repoLooksLikeLocateAnything(repoName) { + return true + } + } + + // Fallback: hfapi recursion bug may leave HuggingFace nil — decide + // from the URI owner/repo. + if _, repo, ok := HFOwnerRepoFromURI(details.URI); ok { + if repoLooksLikeLocateAnything(repo) { + return true + } + } + + return false +} + +func (i *LocateAnythingImporter) Import(details Details) (gallery.ModelConfig, error) { + preferences, err := details.Preferences.MarshalJSON() + if err != nil { + return gallery.ModelConfig{}, err + } + preferencesMap := make(map[string]any) + if len(preferences) > 0 { + if err := json.Unmarshal(preferences, &preferencesMap); err != nil { + return gallery.ModelConfig{}, err + } + } + + name, ok := preferencesMap["name"].(string) + if !ok { + name = filepath.Base(details.URI) + } + + description, ok := preferencesMap["description"].(string) + if !ok { + description = "Imported from " + details.URI + } + + // Prefer the canonical HF "owner/repo" identifier so the emitted + // YAML mirrors gallery locate-anything entries. + model := details.URI + if details.HuggingFace != nil && details.HuggingFace.ModelID != "" { + model = details.HuggingFace.ModelID + } else if owner, repo, ok := HFOwnerRepoFromURI(details.URI); ok { + model = owner + "/" + repo + } + + // Always the native C++/ggml backend; explicit preferences.backend + // overrides the default. + backend := "locate-anything-cpp" + if b, ok := preferencesMap["backend"].(string); ok && b != "" { + backend = b + } + + modelConfig := config.ModelConfig{ + Name: name, + Description: description, + Backend: backend, + KnownUsecaseStrings: []string{"detection"}, + PredictionOptions: schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{Model: model}, + }, + } + + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + + return gallery.ModelConfig{ + Name: name, + Description: description, + ConfigFile: string(data), + }, nil +} diff --git a/core/gallery/importers/locate-anything_test.go b/core/gallery/importers/locate-anything_test.go new file mode 100644 index 000000000000..1e4b60f7ccf8 --- /dev/null +++ b/core/gallery/importers/locate-anything_test.go @@ -0,0 +1,218 @@ +package importers_test + +import ( + "encoding/json" + "fmt" + + "github.com/mudler/LocalAI/core/gallery/importers" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("LocateAnythingImporter", func() { + Context("Importer interface metadata", func() { + It("exposes name/modality/autodetect", func() { + imp := &importers.LocateAnythingImporter{} + Expect(imp.Name()).To(Equal("locate-anything-cpp")) + Expect(imp.Modality()).To(Equal("detection")) + Expect(imp.AutoDetects()).To(BeTrue()) + }) + }) + + Context("Match", func() { + It("matches when backend preference is locate-anything-cpp", func() { + imp := &importers.LocateAnythingImporter{} + preferences := json.RawMessage(`{"backend": "locate-anything-cpp"}`) + details := importers.Details{ + URI: "https://example.com/some-model", + Preferences: preferences, + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("matches when the repo name contains 'locate-anything' (case-insensitive)", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/mudler/locate-anything-cpp-3b", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "mudler/Locate-Anything-CPP-3B", + Author: "mudler", + }, + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("matches when the repo name contains 'locateanything' (case-insensitive)", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "nvidia/LocateAnything-3B", + Author: "nvidia", + }, + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("matches via URI fallback when HuggingFace details are missing", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + } + + Expect(imp.Match(details)).To(BeTrue()) + }) + + It("does not match unrelated repos without locate-anything signals", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/meta-llama/Llama-3-8B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "meta-llama/Llama-3-8B", + Author: "meta-llama", + }, + } + + Expect(imp.Match(details)).To(BeFalse()) + }) + + It("does not match an rfdetr repo", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/mudler/rfdetr-cpp-nano", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "mudler/rfdetr-cpp-nano", + Author: "mudler", + }, + } + + Expect(imp.Match(details)).To(BeFalse()) + }) + + It("returns false for invalid preferences JSON", func() { + imp := &importers.LocateAnythingImporter{} + preferences := json.RawMessage(`not valid json`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + Expect(imp.Match(details)).To(BeFalse()) + }) + }) + + Context("Import", func() { + It("produces a YAML with backend locate-anything-cpp and the repo as the model", func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + HuggingFace: &hfapi.ModelDetails{ + ModelID: "nvidia/LocateAnything-3B", + Author: "nvidia", + }, + } + + modelConfig, err := imp.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: locate-anything-cpp"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("nvidia/LocateAnything-3B"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("detection"), fmt.Sprintf("Model config: %+v", modelConfig)) + }) + + It("respects custom name and description from preferences", func() { + imp := &importers.LocateAnythingImporter{} + preferences := json.RawMessage(`{"name": "my-locate", "description": "Custom"}`) + details := importers.Details{ + URI: "https://huggingface.co/nvidia/LocateAnything-3B", + Preferences: preferences, + HuggingFace: &hfapi.ModelDetails{ + ModelID: "nvidia/LocateAnything-3B", + Author: "nvidia", + }, + } + + modelConfig, err := imp.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("my-locate")) + Expect(modelConfig.Description).To(Equal("Custom")) + }) + }) + + // Table-driven coverage of the backend routing: locate-anything repos + // always route to the native locate-anything-cpp backend, with an + // explicit preferences.backend override honoured. + // + // Cases are kept offline-deterministic by injecting Details directly + // rather than going through DiscoverModelConfig (which would hit live HF). + Context("backend routing (offline)", func() { + hfFile := func(path string) hfapi.ModelFile { + return hfapi.ModelFile{Path: path} + } + + type tc struct { + name string + uri string + modelID string + files []hfapi.ModelFile + prefs string + expectBackend string // expected `backend:` line content + } + + entries := []tc{ + { + name: "canonical NVIDIA repo routes to locate-anything-cpp", + uri: "https://huggingface.co/nvidia/LocateAnything-3B", + modelID: "nvidia/LocateAnything-3B", + files: []hfapi.ModelFile{hfFile("locate-anything-3b-q8_0.gguf"), hfFile("README.md")}, + prefs: "", + expectBackend: "backend: locate-anything-cpp", + }, + { + name: "GGUF bundle with locate-anything name routes to locate-anything-cpp", + uri: "https://huggingface.co/mudler/locate-anything.cpp-3b", + modelID: "mudler/locate-anything.cpp-3b", + files: []hfapi.ModelFile{hfFile("model-f16.gguf")}, + prefs: "", + expectBackend: "backend: locate-anything-cpp", + }, + { + name: "explicit preferences.backend override is honoured", + uri: "https://huggingface.co/nvidia/LocateAnything-3B", + modelID: "nvidia/LocateAnything-3B", + files: nil, + prefs: `{"backend": "locate-anything-cpp"}`, + expectBackend: "backend: locate-anything-cpp", + }, + } + + for _, e := range entries { + e := e // capture for closure + It(e.name, func() { + imp := &importers.LocateAnythingImporter{} + details := importers.Details{ + URI: e.uri, + HuggingFace: &hfapi.ModelDetails{ + ModelID: e.modelID, + Files: e.files, + }, + } + if e.prefs != "" { + details.Preferences = json.RawMessage(e.prefs) + } + + Expect(imp.Match(details)).To(BeTrue(), fmt.Sprintf("Match should fire for %+v", details)) + + modelConfig, err := imp.Import(details) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Import error: %v", err)) + Expect(modelConfig.ConfigFile).To(ContainSubstring(e.expectBackend), + fmt.Sprintf("Model config: %+v", modelConfig)) + }) + } + }) +}) diff --git a/gallery/index.yaml b/gallery/index.yaml index 9d03a98a9bad..447b1fb6ca3d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -6685,6 +6685,37 @@ - filename: rfdetr-nano-q8_0.gguf uri: huggingface://mudler/rfdetr-cpp-nano/rfdetr-nano-q8_0.gguf sha256: 940084c60a780f1a19a51458ae3a601454b3b843675fa0713ff43ae5bccc0d9b +- name: locate-anything-3b + url: github:mudler/LocalAI/gallery/virtual.yaml@master + urls: + - https://github.com/mudler/locate-anything.cpp + - https://huggingface.co/nvidia/LocateAnything-3B + - https://huggingface.co/mudler/locate-anything.cpp-gguf + description: | + NVIDIA LocateAnything-3B open-vocabulary object detection (visual grounding), served via the native + locate-anything.cpp backend (C++/ggml + purego, no Python). Describe what to find in a text prompt and + get labeled boxes back; separate multiple categories with . Q8_0 is the recommended default: + box-identical to F16/F32, ~6.3GB, fastest CPU latency. Drop-in for the /v1/detection endpoint (pass the + prompt). + license: other + icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 + tags: + - object-detection + - open-vocabulary + - locate-anything + - native + - cpp + - cpu + overrides: + backend: locate-anything-cpp + known_usecases: + - detection + parameters: + model: locate-anything-q8_0.gguf + files: + - filename: locate-anything-q8_0.gguf + uri: huggingface://mudler/locate-anything.cpp-gguf/locate-anything-q8_0.gguf + sha256: 0909d8a1aba584b482d501baae032611d1559878be1b7f6606ba516687c5380d - name: rfdetr-cpp-base url: github:mudler/LocalAI/gallery/virtual.yaml@master urls: