waggle-sensor
diff --git a/‎Readme.md‎
Lines changed: 8 additions & 2 deletions b/‎Readme.md‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎benchmarking/benchmarks/Cloudbench/Dockerfile.job‎
Lines changed: 24 additions & 0 deletions b/‎benchmarking/benchmarks/Cloudbench/Dockerfile.job‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎benchmarking/benchmarks/Cloudbench/Makefile‎
Lines changed: 29 additions & 0 deletions b/‎benchmarking/benchmarks/Cloudbench/Makefile‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎benchmarking/benchmarks/Cloudbench/Readme.md‎
Lines changed: 90 additions & 0 deletions b/‎benchmarking/benchmarks/Cloudbench/Readme.md‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎benchmarking/benchmarks/Cloudbench/benchmark_dataset.py‎
Lines changed: 36 additions & 0 deletions b/‎benchmarking/benchmarks/Cloudbench/benchmark_dataset.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎benchmarking/benchmarks/Cloudbench/config.py‎
Lines changed: 131 additions & 0 deletions b/‎benchmarking/benchmarks/Cloudbench/config.py‎
Lines changed: 131 additions & 0 deletions
@@ -159,6 +159,10 @@ kubectl kustomize nrp-dev -o sage-image-search-dev.yaml or kubectl kustomize nrp
       - https://huggingface.co/datasets/sagecontinuum/INQUIRE-Benchmark-small
       - https://huggingface.co/datasets/sagecontinuum/FireBench
       - ...
+- [ ] look into using text encoders only to see if just using caption-query comparisons can be enough or improve retrieval with embeddings. Essentially the image will NOT be embedded in the same vector space as the captions anymore.
+   - embeddinggemma model: https://huggingface.co/google/embeddinggemma-300m
+   - E5-mistral-7b-instruct: https://huggingface.co/intfloat/e5-mistral-7b-instruct
+      - this is hosted by NRP so it will be easy to use.
 - [ ] Bechmark Milvus@NRP
    - using...
       - https://huggingface.co/datasets/sagecontinuum/INQUIRE-Benchmark-small
@@ -167,6 +171,10 @@ kubectl kustomize nrp-dev -o sage-image-search-dev.yaml or kubectl kustomize nrp
 - [ ] switch to reranking with Clip DFN5B-CLIP-ViT-H-14-378
    - before making the switch permanent run the benchmarking suite to see if there are any regressions
    - firebench results show that it is better than the current reranker model (ms-marco-MiniLM-L6-v2)
+- [ ] look into MMR (maximal marginal relevance) to see if it can improve the reranking performance or to implement it as a "toggle" to apply it only to certain queries.
+   - https://milvus.io/ai-quick-reference/how-is-diversity-in-search-results-achieved
+- [ ] Integrate ShieldGemma 2 to implement policies and mark images as yes/no if the image violates the policy
+   - [ShieldGemma 2 Model Card](https://ai.google.dev/gemma/docs/shieldgemma/model_card_2)
 - [ ] add a heartbeat metric for Sage Object Storage (nrdstor)
    - specifically here in the code: https://github.com/waggle-sensor/sage-nrp-image-search/blob/main/weavloader/processing.py#L159
 - [ ] add a metric to count the images that have been indexed into the vectordb
@@ -218,6 +226,4 @@ kubectl kustomize nrp-dev -o sage-image-search-dev.yaml or kubectl kustomize nrp
       - Incremental Update Latency
          - Time between new image upload and being searchable
       - examples here: https://chatgpt.com/c/684b1286-1144-8003-8a20-85a1045375c3
-- [ ] Integrate ShieldGemma 2 to implement policies and mark images as yes/no if the image violates the policy
-   - [ShieldGemma 2 Model Card](https://ai.google.dev/gemma/docs/shieldgemma/model_card_2)
 - [ ] turn on batching for triton and utilize it in weavloader
@@ -0,0 +1,24 @@
+# CloudBench Benchmark Job Dockerfile
+# Combined Dockerfile for running both data loading and evaluation
+
+ARG PYTHON_VERSION=3.11-slim
+FROM python:${PYTHON_VERSION}
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    procps \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . .
+
+# Run combined benchmark script
+CMD ["python", "run_benchmark.py"]
@@ -0,0 +1,29 @@
+# CloudBench Benchmark Makefile
+# This file sets CloudBench-specific variables and includes the base benchmarking Makefile
+
+# ============================================================================
+# Required Variables (must be set for base Makefile)
+# ============================================================================
+BENCHMARK_NAME := cloudbench
+DOCKERFILE_JOB := Dockerfile.job
+RESULTS_FILES := image_search_results.csv query_eval_metrics.csv
+ENV ?= dev
+ifeq ($(ENV),prod)
+  KUSTOMIZE_DIR := ../../kubernetes/Cloudbench/nrp-prod
+else
+  KUSTOMIZE_DIR := ../../kubernetes/Cloudbench/nrp-dev
+endif
+
+# ============================================================================
+# Optional Variables (can be overridden)
+# ============================================================================
+KUBECTL_NAMESPACE := sage
+KUBECTL_CONTEXT ?= nautilus
+REGISTRY := gitlab-registry.nrp-nautilus.io/ndp/sage/nrp-image-search
+JOB_TAG ?= latest
+
+# Local run script
+RUN_SCRIPT := run_benchmark.py
+
+# Include the base Makefile (after setting variables)
+include ../Makefile
@@ -0,0 +1,90 @@
+# CloudBench Benchmark
+
+This benchmark uses [CloudBench](https://huggingface.co/datasets/sagecontinuum/CloudBench) with Weaviate as the vector database for evaluating text-to-image retrieval in cloud and atmospheric science. CloudBench is a benchmark dataset for cloud image retrieval: natural language queries paired with images and binary relevance labels.
+
+## Dataset
+
+- **Source**: [sagecontinuum/CloudBench](https://huggingface.co/datasets/sagecontinuum/CloudBench) on Hugging Face
+- **Contents**: Query–image pairs with relevance labels (0 = not relevant, 1 = relevant), plus metadata (cloud_coverage, viewpoint, lighting, confounder_type, occlusion_present, multiple_cloud_types, horizon_visible, ground_visible, sun_visible, precipitation_visible, overcast, multiple_layers, storm_visible)
+- **Split**: The dataset provides a single `train` split (~4.6k rows)
+
+## Usage
+
+This benchmark is intended to be used with [Sage Image Search](../../../kubernetes/base/). The Makefile references components deployed there and runs the CloudBench benchmark job.
+
+## Running the Benchmark
+
+### Prerequisites
+
+- **Kubernetes cluster** access with `kubectl` configured
+- **kustomize** (or kubectl with kustomize support)
+- **Docker** for building images
+- **Weaviate and Triton** deployed (e.g. from `kubernetes/nrp-dev` or `kubernetes/nrp-prod`)
+
+### Steps
+
+1. **Deploy Sage Image Search infrastructure** (from the main `kubernetes` directory):
+   ```bash
+   kubectl apply -k nrp-dev   # or nrp-prod
+   ```
+
+2. **Build and push the benchmark image**:
+   ```bash
+   cd benchmarking/benchmarks/Cloudbench
+   make build
+   docker push <registry>/benchmark-cloudbench-job:latest
+   ```
+
+3. **Run the CloudBench benchmark** (loads data and evaluates):
+   ```bash
+   make run   # defaults to dev environment
+   make logs  # monitor progress
+   ```
+   This loads `sagecontinuum/CloudBench` into Weaviate, runs the evaluation, and saves results.
+
+4. **Run locally (development)**:
+   ```bash
+   make run-local
+   ```
+   Uses port-forwarding to Weaviate and Triton.
+
+### Results
+
+After a run, three files are produced:
+
+- **`image_search_results.csv`**: Metadata of images returned for each query
+- **`query_eval_metrics.csv`**: Evaluation metrics (NDCG, precision, recall, etc.) per query
+- **`config_values.csv`**: Configuration used for the run (`config.to_csv()`)
+
+Results are written to `/app/results` in Kubernetes (with a volume mount) or to the current directory when using `make run-local`. Optional S3 upload uses paths like `{S3_PREFIX}/{timestamp}/{filename}`.
+
+## Environment Variables
+
+- **CLOUDBENCH_DATASET**: HuggingFace dataset name (default: `sagecontinuum/CloudBench`)
+- **COLLECTION_NAME**: Weaviate collection name (default: `CloudBench`)
+- **SAMPLE_SIZE**: Number of samples (0 = use full dataset)
+- **SEED**, **HF_TOKEN**, **WORKERS**, **IMAGE_BATCH_SIZE**, **QUERY_BATCH_SIZE**: Data and processing
+- **QUERY_METHOD**, **TARGET_VECTOR**, **RESPONSE_LIMIT**: Query and retrieval
+- See `config.py` for the full list (Weaviate, Triton, S3, etc.).
+
+## Citation
+
+If you use CloudBench, cite the dataset:
+
+```bibtex
+@misc{cloudbench_2026,
+    author       = { Sage Continuum and Francisco Lozano },
+    affiliation  = { Northwestern University },
+    title        = { CloudBench },
+    year         = 2026,
+    url          = { https://huggingface.co/datasets/sagecontinuum/CloudBench },
+    doi          = { 10.57967/hf/7784 },
+    publisher    = { Hugging Face }
+}
+```
+
+## References
+
+- [CloudBench on Hugging Face](https://huggingface.co/datasets/sagecontinuum/CloudBench)
+- [Weaviate: NDCG and retrieval evaluation](https://weaviate.io/blog/retrieval-evaluation-metrics#normalized-discounted-cumulative-gain-ndcg)
+- [imsearch_eval](https://github.com/waggle-sensor/imsearch_eval) framework
@@ -0,0 +1,36 @@
+"""CloudBench benchmark dataset implementation."""
+from imsearch_eval.adapters.huggingface import HuggingFaceDataset
+
+
+class CloudBench(HuggingFaceDataset):
+    """Benchmark dataset class for CloudBench (cloud/atmospheric image retrieval)."""
+
+    def get_query_column(self) -> str:
+        """Get the name of the column containing the query text."""
+        return "query_text"
+
+    def get_query_id_column(self) -> str:
+        """Get the name of the column containing the query ID."""
+        return "query_id"
+
+    def get_relevance_column(self) -> str:
+        """Get the name of the column containing relevance labels (0=not relevant, 1=relevant)."""
+        return "relevance_label"
+
+    def get_metadata_columns(self) -> list:
+        """Get optional metadata columns to include in evaluation stats."""
+        return [
+            "cloud_coverage",
+            "viewpoint",
+            "lighting",
+            "confounder_type",
+            "occlusion_present",
+            "multiple_cloud_types",
+            "horizon_visible",
+            "ground_visible",
+            "sun_visible",
+            "precipitation_visible",
+            "overcast",
+            "multiple_layers",
+            "storm_visible",
+        ]
@@ -0,0 +1,131 @@
+"""CloudBench-specific configuration/hyperparameters."""
+
+import os
+from weaviate.classes.config import VectorDistances, Configure
+from weaviate.collections.classes.config_vector_index import VectorFilterStrategy
+
+from imsearch_eval.framework.interfaces import Config
+
+
+class CloudBenchConfig(Config):
+    """Configuration for CloudBench benchmark (cloud/atmospheric image retrieval)."""
+
+    def __init__(self):
+        """Initialize CloudBench configuration."""
+        # dataset parameters
+        self.cloudbench_dataset = os.environ.get(
+            "CLOUDBENCH_DATASET", "sagecontinuum/CloudBench"
+        )
+        self.sample_size = int(os.environ.get("SAMPLE_SIZE", 0))
+        self.seed = int(os.environ.get("SEED", 42))
+        self._hf_token = os.environ.get("HF_TOKEN", "")
+        # Upload parameters
+        self._upload_to_s3 = os.environ.get("UPLOAD_TO_S3", "false").lower() == "true"
+        self._s3_bucket = os.environ.get("S3_BUCKET", "sage_imsearch")
+        self._s3_prefix = os.environ.get("S3_PREFIX", "dev-metrics/cloudbench")
+        self._s3_endpoint = os.environ.get(
+            "S3_ENDPOINT", "http://rook-ceph-rgw-nautiluss3.rook"
+        )
+        self._s3_access_key = os.environ.get("S3_ACCESS_KEY", "")
+        self._s3_secret_key = os.environ.get("S3_SECRET_KEY", "")
+        self._s3_secure = os.environ.get("S3_SECURE", "false").lower() == "true"
+        self._image_results_file = os.environ.get(
+            "IMAGE_RESULTS_FILE", "image_search_results.csv"
+        )
+        self._query_eval_metrics_file = os.environ.get(
+            "QUERY_EVAL_METRICS_FILE", "query_eval_metrics.csv"
+        )
+        self._config_values_file = os.environ.get(
+            "CONFIG_VALUES_FILE", "config_values.csv"
+        )
+
+        # Weaviate parameters
+        self._weaviate_host = os.environ.get("WEAVIATE_HOST", "127.0.0.1")
+        self._weaviate_port = os.environ.get("WEAVIATE_PORT", "8080")
+        self._weaviate_grpc_port = os.environ.get("WEAVIATE_GRPC_PORT", "50051")
+        self._collection_name = os.environ.get("COLLECTION_NAME", "CloudBench")
+
+        # model provider parameters
+        self._llm_model_provider = os.environ.get(
+            "LLM_MODEL_PROVIDER", "triton"
+        ).lower()
+
+        # Triton parameters
+        self._triton_host = os.environ.get("TRITON_HOST", "triton")
+        self._triton_port = os.environ.get("TRITON_PORT", "8001")
+
+        # Workers parameters
+        self._workers = int(os.environ.get("WORKERS", 5))
+        self._image_batch_size = int(os.environ.get("IMAGE_BATCH_SIZE", 25))
+        self._query_batch_size = int(os.environ.get("QUERY_BATCH_SIZE", 5))
+
+        # Logging parameters
+        self._log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
+
+        # Weaviate HNSW hyperparameters
+        self.hnsw_dist_metric = getattr(
+            VectorDistances, os.environ.get("HNSW_DIST_METRIC", "COSINE").upper()
+        )
+        self.hnsw_ef = int(os.environ.get("HNSW_EF", -1))
+        self.hnsw_ef_construction = int(os.environ.get("HNSW_EF_CONSTRUCTION", 100))
+        self.hnsw_maxConnections = int(os.environ.get("HNSW_MAX_CONNECTIONS", 50))
+        self.hsnw_dynamicEfMax = int(os.environ.get("HNSW_DYNAMIC_EF_MAX", 500))
+        self.hsnw_dynamicEfMin = int(os.environ.get("HNSW_DYNAMIC_EF_MIN", 200))
+        self.hnsw_ef_factor = int(os.environ.get("HNSW_EF_FACTOR", 20))
+        self.hsnw_filterStrategy = getattr(
+            VectorFilterStrategy,
+            os.environ.get("HNSW_FILTER_STRATEGY", "ACORN").upper(),
+        )
+        self.hnsw_flatSearchCutoff = int(
+            os.environ.get("HNSW_FLAT_SEARCH_CUTOFF", 40000)
+        )
+        self.hnsw_vector_cache_max_objects = int(
+            os.environ.get("HNSW_VECTOR_CACHE_MAX_OBJECTS", 1e12)
+        )
+        self.hnsw_quantizer = Configure.VectorIndex.Quantizer.pq(
+            training_limit=int(
+                os.environ.get("HNSW_QUANTIZER_TRAINING_LIMIT", 500000)
+            )
+        )
+
+        # Query parameters
+        self.query_method = os.environ.get("QUERY_METHOD", "clip_hybrid_query")
+        self.target_vector = os.environ.get("TARGET_VECTOR", "clip")
+        self.response_limit = int(os.environ.get("RESPONSE_LIMIT", 50))
+        self.advanced_query_parameters = {
+            "alpha": float(os.environ.get("QUERY_ALPHA", 0.4)),
+            "query_properties": ["caption"],
+            "autocut_jumps": int(os.environ.get("AUTOCUT_JUMPS", 0)),
+            "rerank_prop": os.environ.get("RERANK_PROP", "caption"),
+            "clip_alpha": float(os.environ.get("CLIP_ALPHA", 0.7)),
+        }
+
+        # Caption prompts (same as Firebench)
+        default_prompt = """
+role:
+You are a world-class Scientific Image Captioning Expert.
+
+context:
+You will be shown a scientific image captured by edge devices. Your goal is to analyze its content and significance in detail.
+
+task:
+Generate exactly one scientifically detailed caption that accurately describes what is visible in the image and its scientific relevance.
+Make it as detailed as possible. Also extract text and numbers from the images.
+
+constraints:
+- Only return:
+  1. A single caption.
+  2. a list of 15 keywords relevant to the image.
+- Do not include any additional text, explanations, or formatting.
+
+format:
+  caption: <your_scientific_caption_here>
+  keywords: <keyword1>, <keyword2>, ...
+"""
+        self.gemma3_prompt = os.environ.get("GEMMA3_PROMPT", default_prompt)
+
+    @staticmethod
+    def is_nrp_key_set():
+        """Check if NRP API key is set."""
+        if os.environ.get("NRP_API_KEY", "") == "":
+            raise ValueError("NRP_API_KEY is not set")