Merge pull request #667 from danactive/moveRaw

danactive · web-flow · commit c65c655a17c6 · 2025-06-23T17:46:25.000-07:00
Move raw
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -51,7 +51,7 @@ jobs:
         run: npm run typecheck --if-present
 
   unittest:
-    name: Jest
+    name: Unit tests
     runs-on: ubuntu-latest
     steps:
       - name: ⬇️ Checkout repo
diff --git a/Makefile b/Makefile
@@ -33,7 +33,8 @@ build-ai-api:
 ai-api:
 	# OpenAI model stores in ~/.cache/clip
 	docker run --rm --name ai-api -p 8080:8080 \
-		-v $(HOME)/.cache/clip:/root/.cache/clip
+		-v $(HOME)/.cache/clip:/root/.cache/clip \
+		ai-api
 
 build-test:
 	docker build  -f apps/api/Dockerfile --build-arg INSTALL_TEST=true -t ai-api-test .
diff --git a/apps/api/README.md b/apps/api/README.md
@@ -1,45 +1,91 @@
-# 🌿 iNaturalist 2021 Image Classifier (FastAPI + EVA-CLIP)
+# 🌿 iNaturalist 2021 Image Classifier & LAION Aesthetic Scoring API
 
-This project serves a high-accuracy image classification API using a **Vision Transformer** model fine-tuned on the **iNaturalist 2021** biodiversity dataset. It supports top-k prediction and an optional debug mode with detailed logits, scores, and resized input images.
+This project provides a robust FastAPI-based backend for two advanced computer vision endpoints:
 
-## 🧠 Model Details
+- **Biodiversity Image Classification** using a fine-tuned Vision Transformer (ViT) on the iNaturalist 2021 dataset.
+- **Aesthetic Scoring** using the LAION regression head on OpenAI CLIP ViT-B/16 features.
 
-- **Model Family**: [`timm`](https://github.com/rwightman/pytorch-image-models)
-- **Model Name**: `eva02_large_patch14_clip_336.merged2b_ft_inat21`
-- **Source**: Hugging Face Hub via `hf-hub:timm/eva02_large_patch14_clip_336.merged2b_ft_inat21`
-- **Architecture**: Vision Transformer (EVA-CLIP backbone)
-- **Pretraining**: Internally pre-trained CLIP-like architecture
-- **Fine-tuned On**: iNaturalist 2021 (10,000+ species of plants, animals, fungi, and microbes)
-- **Output Classes**: Mapped using `inat21_class_index.json`
-- **Label URL**: Provided via `model.default_cfg['label_url']`
+Both endpoints support raw image uploads, and leverage state-of-the-art models for their respective tasks while keeping our data private.
 
-## 🖼️ Input Format
+---
 
-- Accepts raw image bytes (e.g., `image/jpeg`, `image/png`)
-- Auto-converted to RGB using Pillow
-- Resized to 384x384, then center cropped to 336x336
-- Normalized using CLIP-style mean and std values:
-	- `mean = [0.48145466, 0.4578275, 0.40821073]`
-	- `std = [0.26862954, 0.26130258, 0.27577711]`
+## 🧠 Model Details
 
-## CLI commands
-- `make build-ai-api`
-- `make ai-api`
+### 1. Biodiversity Classifier
 
+- **Model Family:** [`timm`](https://github.com/rwightman/pytorch-image-models)
+- **Model Name:** `eva02_large_patch14_clip_336.merged2b_ft_inat21`
+- **Source:** Hugging Face Hub
+- **Architecture:** Vision Transformer (EVA-CLIP backbone)
+- **Fine-tuned On:** iNaturalist 2021 (10,000+ species)
+- **Output Classes:** Mapped using `inat21_class_index.json`
 
-# Aesthetic Scoring
+### 2. Aesthetic Scoring
 
-This project provides an API for **aesthetic scoring** of images using a regression head trained on top of OpenAI's CLIP ViT-B/16 backbone.
+- **Backbone:** OpenAI CLIP ViT-B/16
+- **Regression Head:** Multilayer Perceptron (MLP) trained for aesthetic prediction ([LAION aesthetic predictor](https://github.com/LAION-AI/aesthetic-predictor))
+- **Head Weights:** `models/aesthetic/sa_0_4_vit_b_16_linear.pth`
+- **Feature Dimension:** 512
 
 ---
 
-## 🧠 Model Details
+## 🚀 API Endpoints
 
-- **Backbone:** OpenAI CLIP ViT-B/16
-- **Regression Head:** Multilayer Perceptron (MLP) trained for aesthetic prediction
-- **Head Weights:** `models/aesthetic/sa_0_4_vit_b_16_linear.pth`
-- **Feature Dimension:** 512
+### 1. `/classify` — Biodiversity Image Classification
 
+**Description:**
+Predicts the top-3 most likely species for a given image using a ViT model fine-tuned on iNaturalist 2021.
+
+**Request:**
+- **Method:** `POST`
+- **Content-Type:** `image/jpeg` or `image/png`
+- **Body:** Raw image bytes
+
+**Example (using curl):**
+```sh
+curl -X POST -H "Content-Type: image/jpeg" --data-binary @your_image.jpg http://localhost:8080/classify
+```
+
+**Response:**
+- **Status:** 200 OK
+- **Content-Type:** `application/json`
+- **Body:** JSON object with top-3 species predictions, e.g.,
+```json
+{
+  "predictions": [
+    {"species": "Cardinalis cardinalis", "score": 0.987},
+    {"species": "Pica pica", "score": 0.005},
+    {"species": "Corvus corax", "score": 0.003}
+  ]
+}
+```
+
+### 2. `/score` — Aesthetic Scoring
+
+**Description:**
+Predicts the aesthetic score of an image on a scale from 0 to 10 using the LAION regression head.
+
+**Request:**
+- **Method:** `POST`
+- **Content-Type:** `image/jpeg` or `image/png`
+- **Body:** Raw image bytes
+
+**Example (using curl):**
+```sh
+curl -X POST -H "Content-Type: image/jpeg" --data-binary @your_image.jpg http://localhost:8080/score
+```
+
+**Response:**
+- **Status:** 200 OK
+- **Content-Type:** `application/json`
+- **Body:** JSON object with the aesthetic score, e.g.,
+```json
+{
+  "score": 7.5
+}
+```
+
+---
 
 ## Local setup
 
diff --git a/apps/api/aesthetic.py b/apps/api/aesthetic.py
@@ -2,7 +2,6 @@
 import torch
 import torch.nn as nn
 import torchvision.transforms as T
-import open_clip
 from PIL import Image
 import logging
 from collections import OrderedDict
@@ -15,7 +14,6 @@
 logger.setLevel(logging.DEBUG)
 
 HEAD_PATH = "models/aesthetic/sa_0_4_vit_b_16_linear.pth"
-CHECKPOINT_PATH = "models/openai_CLIP-ViT-L-16/open_clip_pytorch_model.bin"
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -78,6 +76,7 @@ async def score_aesthetic(req: Request) -> float:
     image_tensor = preprocess(img).unsqueeze(0)
     image_features = _clip_model.encode_image(image_tensor)
     image_features /= image_features.norm(dim=-1, keepdim=True)
-    score = regression_head(image_features).item()
+    score_tensor = regression_head(image_features)
+    score = score_tensor.item()
 
   return float(score)
diff --git a/apps/api/requirements.txt b/apps/api/requirements.txt
@@ -1,9 +1,9 @@
 fastapi==0.115.12
-open_clip_torch==2.24.0
+git+https://github.com/openai/CLIP.git
+numpy==1.26.4
 pillow==11.2.1
 scikit-learn==1.5.0
 timm==1.0.15
-torch==2.7.1
-torchvision==0.22.1
+torch==2.0.1
+torchvision==0.15.2
 uvicorn==0.34.3
-git+https://github.com/openai/CLIP.git
diff --git a/src/lib/__tests__/rename-moveRaw.vitest.ts b/src/lib/__tests__/rename-moveRaw.vitest.ts
@@ -22,22 +22,24 @@ describe('moveRaws function', () => {
   beforeEach(() => {
     vi.resetAllMocks()
     originalPath = '/'
-    filesOnDisk = ['image.heic', 'photo.heif', 'document.txt']
+    filesOnDisk = ['image.heic', 'photo.heif', 'clip.raw', 'movie.mov', 'document.txt', 'photo.jpg']
     errors = []
     formatErrorMessage = vi.fn((err, msg) => `${msg}: ${err.message}`)
   })
 
 
-  test("should create 'raws' folder and move HEIC/HEIF files", async () => {
+  test("should create 'raws' folder and move all configured raw files", async () => {
     vi.mocked(fs.mkdir).mockResolvedValue(undefined)
     vi.mocked(fs.rename).mockResolvedValue(undefined)
 
     await moveRaws({ originalPath, filesOnDisk, errors, formatErrorMessage })
 
     expect(fs.mkdir).toHaveBeenCalledWith(path.join(originalPath, 'raws'), { recursive: true })
-    expect(fs.rename).toHaveBeenCalledTimes(2)
+    expect(fs.rename).toHaveBeenCalledTimes(4)
     expect(fs.rename).toHaveBeenCalledWith(path.join(originalPath, 'image.heic'), path.join(originalPath, 'raws/image.heic'))
     expect(fs.rename).toHaveBeenCalledWith(path.join(originalPath, 'photo.heif'), path.join(originalPath, 'raws/photo.heif'))
+    expect(fs.rename).toHaveBeenCalledWith(path.join(originalPath, 'clip.raw'), path.join(originalPath, 'raws/clip.raw'))
+    expect(fs.rename).toHaveBeenCalledWith(path.join(originalPath, 'movie.mov'), path.join(originalPath, 'raws/movie.mov'))
     expect(errors).toHaveLength(0)
   })
 
@@ -47,16 +49,17 @@ describe('moveRaws function', () => {
 
     await moveRaws({ originalPath, filesOnDisk, errors, formatErrorMessage })
 
-    expect(errors).toHaveLength(2)
-    expect(formatErrorMessage).toHaveBeenCalledTimes(2)
-    expect(errors[0]).toContain('Error moving HEIF file: image.heic')
+    // Only raw files should trigger errors
+    expect(errors).toHaveLength(4)
+    expect(formatErrorMessage).toHaveBeenCalledTimes(4)
+    expect(errors[0]).toContain('Error moving raw file: image.heic')
   })
 
-  test('should not move non-HEIF files', async () => {
+  test('should not move files not in config', async () => {
     vi.mocked(fs.mkdir).mockResolvedValue(undefined)
     vi.mocked(fs.rename).mockResolvedValue(undefined)
 
-    filesOnDisk = ['document.txt', 'photo.jpg'] // No HEIC/HEIF files
+    filesOnDisk = ['document.txt', 'photo.jpg'] // No raw files
 
     await moveRaws({ originalPath, filesOnDisk, errors, formatErrorMessage })
 
diff --git a/src/lib/rename.ts b/src/lib/rename.ts
@@ -6,6 +6,7 @@ import { validateRequestBody, type RequestSchema } from '../models/rename'
 import checkPathExists from './exists'
 import { futureFilenamesOutputs } from './filenames'
 import { type ErrorFormatter } from './resize'
+import config from '../models/config'
 
 type ResponseBody = {
   renamed: boolean;
@@ -125,16 +126,26 @@ async function moveRaws(
   const rawsPath = path.join(path.dirname(originalPath), 'raws')
   await fs.mkdir(rawsPath, { recursive: true })
 
+  // Collect all raw extensions from config (lowercase, with dot)
+  const rawExtensions = new Set(
+    [
+      ...['heic', 'heif'],
+      ...(config.rawFileTypes?.photo ?? []),
+      ...(config.rawFileTypes?.video ?? []),
+    ].map(ext => `.${ext.toLowerCase()}`)
+  )
+
   for (const file of filesOnDisk) {
-    if (file.toLowerCase().endsWith('.heic') || file.toLowerCase().endsWith('.heif')) {
+    const ext = path.extname(file).toLowerCase()
+    if (rawExtensions.has(ext)) {
       const sourceFile = path.join(originalPath, file)
       const destinationFile = path.join(rawsPath, file)
 
       try {
         await fs.rename(sourceFile, destinationFile) // Move file
         console.log(`Moved: ${file} → raws`)
       } catch (err) {
-        errors.push(formatErrorMessage(err, `Error moving HEIF file: ${file}`))
+        errors.push(formatErrorMessage(err, `Error moving raw file: ${file}`))
       }
     }
   }