Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,22 @@ load-weights:
-e MODEL_REPO="$(MODEL_REPO)" \
-e FILENAMES="$(FILENAMES)" \
weights-loader \
sh -c 'python /dock/hugging-offline.py --repo-id "$$MODEL_REPO" --filenames "$$FILENAMES" > /dock/build.log 2>&1'
sh -c 'python /dock/hugging-offline.py --repo-id "$$MODEL_REPO" --filenames $$FILENAMES > /dock/build.log 2>&1'
@echo "Part 3/3 Copying weights to local directory and display log"
docker cp extract-model:/dock/models/. ./models/
docker cp extract-model:/dock/build.log ./weights.log || true
docker rm extract-model
cat ./weights.log || true

load-aesthetic-scorer:
$(MAKE) load-clip-vit-base-patch32
$(MAKE) load-weights MODEL_REPO="rsinema/aesthetic-scorer" \
FILENAMES="model.pt preprocessor_config.json tokenizer.json tokenizer_config.json special_tokens_map.json merges.txt vocab.json"

load-clip-vit-base-patch32:
$(MAKE) load-weights MODEL_REPO="openai/clip-vit-base-patch32" \
FILENAMES="pytorch_model.bin config.json preprocessor_config.json tokenizer.json tokenizer_config.json special_tokens_map.json merges.txt vocab.json"

build-ai-api:
docker build -f apps/api/Dockerfile -t ai-api .

Expand Down
28 changes: 1 addition & 27 deletions app/[gallery]/persons/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,16 @@ import { Suspense } from 'react'
import type { Metadata } from 'next'

import PersonsClient from '../../../src/components/Persons/PersonsClient'
import { buildAgeSummary } from '../../../src/utils/person-age'
import getGalleries from '../../../src/lib/galleries'
import { getPersonsData } from '../../../src/lib/persons'
import type { Gallery } from '../../../src/types/common'
import { generateClusters } from '../../../src/lib/generate-clusters'
import type { Item } from '../../../src/types/common'

type AgeSummary = {
ages: { age: number; count: number }[];
};

function buildAgeSummary(items: Item[]): AgeSummary {
const counts = new Map<number, number>()
items.forEach(it => {
if (!it.persons || !it.filename) return
const filenameDate = Array.isArray(it.filename)
? (it.filename[0] ?? '').substring(0, 10)
: String(it.filename).substring(0, 10)
const photoDate = (it as any).photoDate || filenameDate
it.persons.forEach(p => {
if (!p.dob) return
const birth = new Date(p.dob.substring(0, 10))
const shot = new Date(photoDate.substring(0, 10))
if (Number.isNaN(birth.getTime()) || Number.isNaN(shot.getTime())) return
let age = shot.getFullYear() - birth.getFullYear()
const m = shot.getMonth() - birth.getMonth()
if (m < 0 || (m === 0 && shot.getDate() < birth.getDate())) age -= 1
if (age >= 0) counts.set(age, (counts.get(age) || 0) + 1)
})
})
return {
ages: Array.from(counts.entries())
.map(([age, count]) => ({ age, count }))
.sort((a, b) => a.age - b.age),
}
}

export const metadata: Metadata = {
title: 'Persons - History App',
}
Expand Down
6 changes: 5 additions & 1 deletion app/admin/album/page.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import dynamic from 'next/dynamic'
import type { Metadata } from 'next'

import AdminAlbumClient from '../../../src/components/AdminAlbum/AdminAlbumClient'
const AdminAlbumClient = dynamic(
() => import('../../../src/components/AdminAlbum/AdminAlbumClient'),
{ ssr: true },
)
import getAlbums from '../../../src/lib/albums'
import config from '../../../src/models/config'
import { type Gallery } from '../../../src/types/common'
Expand Down
6 changes: 5 additions & 1 deletion app/admin/walk/[...path]/page.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import dynamic from 'next/dynamic'
import type { Metadata } from 'next'

import WalkClient from '../../../../src/components/Walk/WalkClient'
const WalkClient = dynamic(
() => import('../../../../src/components/Walk/WalkClient'),
{ ssr: true },
)
import getFilesystems from '../../../../src/lib/filesystems'
import type { Walk } from '../../../../src/types/pages'

Expand Down
6 changes: 5 additions & 1 deletion app/admin/walk/page.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import dynamic from 'next/dynamic'
import type { Metadata } from 'next'

import WalkClient from '../../../src/components/Walk/WalkClient'
const WalkClient = dynamic(
() => import('../../../src/components/Walk/WalkClient'),
{ ssr: true },
)
import getFilesystems from '../../../src/lib/filesystems'

export const metadata: Metadata = {
Expand Down
48 changes: 48 additions & 0 deletions apps/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Apps Quick Start (Python API)

This folder contains the Python API used by the app.

## Routes you likely want

- `POST /scores`
- `POST /classify`

Both accept raw image bytes (`image/jpeg` or `image/png`).

## Start the API (recommended via Docker)

From repo root:

```sh
make build-ai-api
make ai-api
```

API runs at:

```txt
http://localhost:8080
```

## Test the two routes

Use any local image file (example: `public/sample.jpg`).

```sh
curl -X POST -H "Content-Type: image/jpeg" --data-binary @public/sample.jpg http://localhost:8080/scores
```

```sh
curl -X POST -H "Content-Type: image/jpeg" --data-binary @public/sample.jpg http://localhost:8080/classify
```

## Optional: run without Docker

```sh
cd apps/api
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
uvicorn main:main_py_app --host 0.0.0.0 --port 8080
```

13 changes: 13 additions & 0 deletions apps/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,16 @@ curl -X POST -H "Content-Type: image/jpeg" --data-binary @your_image.jpg http://
[sa_0_4_vit_b_16_linear.pth](https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_vit_b_16_linear.pth)
1. Place it in `models/aesthetic/sa_0_4_vit_b_16_linear.pth`
1. The OpenAI CLIP backbone weights for ViT-B/16 will be downloaded automatically on first run `make ai-api`

## Aesthetic scorer (multi-attribute)

To enable the newer multi-attribute aesthetic scorer (used by `/scores`):

1. Download the model weights and processor files:
`make load-aesthetic-scorer`
1. Download the CLIP ViT-B/32 backbone (offline):
`make load-clip-vit-base-patch32`
1. Rebuild and run the API:
`make build-ai-api && make ai-api`

The weights are stored under `models/rsinema_aesthetic-scorer` and `models/openai_clip-vit-base-patch32` for offline loading.
168 changes: 163 additions & 5 deletions apps/api/aesthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,23 @@
import torch
import torch.nn as nn
import torchvision.transforms as T
from PIL import Image
from PIL import Image, ImageFilter
import numpy as np
import logging
from collections import OrderedDict
import io
import clip
from transformers import CLIPProcessor, CLIPVisionModel

# Set up logging once
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("uvicorn")
logger.setLevel(logging.DEBUG)

HEAD_PATH = "models/aesthetic/sa_0_4_vit_b_16_linear.pth"
SCORER_DIR = "models/rsinema_aesthetic-scorer"
SCORER_MODEL_PATH = f"{SCORER_DIR}/model.pt"
CLIP_BASE_DIR = "models/openai_clip-vit-base-patch32"

device = "cuda" if torch.cuda.is_available() else "cpu"

Expand Down Expand Up @@ -65,12 +70,74 @@ def load_clip_model() -> tuple[torch.nn.Module, callable]:
return model, preprocess

# One-time global setup
_clip_model, preprocess = load_clip_model()
regression_head = load_aesthetic_head(HEAD_PATH)
_clip_model = None
preprocess = None
regression_head = None
_aesthetic_scorer = None
_aesthetic_processor = None
_aesthetic_backbone = None

class AestheticScorer(nn.Module):
def __init__(self, backbone):
super().__init__()
self.backbone = backbone
hidden_dim = backbone.config.hidden_size
self.aesthetic_head = nn.Sequential(nn.Linear(hidden_dim, 1))
self.quality_head = nn.Sequential(nn.Linear(hidden_dim, 1))
self.composition_head = nn.Sequential(nn.Linear(hidden_dim, 1))
self.light_head = nn.Sequential(nn.Linear(hidden_dim, 1))
self.color_head = nn.Sequential(nn.Linear(hidden_dim, 1))
self.dof_head = nn.Sequential(nn.Linear(hidden_dim, 1))
self.content_head = nn.Sequential(nn.Linear(hidden_dim, 1))

def forward(self, pixel_values):
features = self.backbone(pixel_values).pooler_output
return (
self.aesthetic_head(features),
self.quality_head(features),
self.composition_head(features),
self.light_head(features),
self.color_head(features),
self.dof_head(features),
self.content_head(features),
)

def load_aesthetic_scorer():
global _aesthetic_scorer, _aesthetic_processor, _aesthetic_backbone
try:
_aesthetic_processor = CLIPProcessor.from_pretrained(SCORER_DIR, use_fast=False, local_files_only=True)
_aesthetic_backbone = CLIPVisionModel.from_pretrained(CLIP_BASE_DIR, local_files_only=True).to(device)
loaded = torch.load(SCORER_MODEL_PATH, map_location=device)
if isinstance(loaded, dict) and all(isinstance(v, torch.Tensor) for v in loaded.values()):
scorer = AestheticScorer(_aesthetic_backbone)
scorer.load_state_dict(loaded, strict=False)
_aesthetic_scorer = scorer
else:
_aesthetic_scorer = loaded
_aesthetic_scorer.eval()
logger.info("✅ Aesthetic scorer loaded.")
except Exception as e:
logger.error(f"⚠️ Failed to load aesthetic scorer: {e}")
_aesthetic_scorer = None
_aesthetic_processor = None
_aesthetic_backbone = None

load_aesthetic_scorer()

def ensure_legacy_aesthetic_loaded():
global _clip_model, preprocess, regression_head
if _clip_model is None or preprocess is None:
_clip_model, preprocess = load_clip_model()
if regression_head is None:
regression_head = load_aesthetic_head(HEAD_PATH)

async def score_aesthetic(req: Request) -> float:
img_bytes = await req.body()
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
ensure_legacy_aesthetic_loaded()
if isinstance(req, Request):
img_bytes = await req.body()
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
else:
img = req.convert("RGB")

with torch.no_grad():
image_tensor = preprocess(img).unsqueeze(0)
Expand All @@ -80,3 +147,94 @@ async def score_aesthetic(req: Request) -> float:
score = score_tensor.item()

return float(score)

def _grayscale_np(img: Image.Image, size: int = 256) -> np.ndarray:
resized = img.resize((size, size))
return (np.array(resized.convert("L"), dtype=np.float32) / 255.0)

def _edges_intensity(img: Image.Image, size: int = 256) -> np.ndarray:
resized = img.resize((size, size))
edges = resized.filter(ImageFilter.FIND_EDGES).convert("L")
return (np.array(edges, dtype=np.float32) / 255.0)

def _rule_of_thirds_score(edge_map: np.ndarray) -> float:
if edge_map.size == 0:
return 0.0
h, w = edge_map.shape
ys = np.linspace(0, h - 1, h, dtype=np.float32)
xs = np.linspace(0, w - 1, w, dtype=np.float32)
yy, xx = np.meshgrid(ys, xs, indexing="ij")
thirds_y = np.array([h / 3, 2 * h / 3], dtype=np.float32)
thirds_x = np.array([w / 3, 2 * w / 3], dtype=np.float32)
sigma = min(h, w) / 12
weight = np.zeros_like(edge_map, dtype=np.float32)
for ty in thirds_y:
for tx in thirds_x:
weight += np.exp(-(((yy - ty) ** 2 + (xx - tx) ** 2) / (2 * sigma ** 2)))
weighted = float((edge_map * weight).sum())
total = float(edge_map.sum())
if total <= 0:
return 0.0
ratio = weighted / total
return float(max(0.0, min(10.0, ratio * 10)))

def _visual_interest_score(edge_map: np.ndarray) -> float:
mean_edge = float(edge_map.mean())
score = mean_edge * 60.0
return float(max(0.0, min(10.0, score)))

def _sharpness_score(gray: np.ndarray) -> float:
if gray.size == 0:
return 0.0
padded = np.pad(gray, 1, mode="edge")
lap = (
padded[:-2, 1:-1]
+ padded[2:, 1:-1]
+ padded[1:-1, :-2]
+ padded[1:-1, 2:]
- 4 * padded[1:-1, 1:-1]
)
variance = float(lap.var())
score = variance * 1000.0
return float(max(0.0, min(10.0, score)))

def _score_with_aesthetic_model(img: Image.Image) -> dict | None:
if _aesthetic_scorer is None or _aesthetic_processor is None:
return None
inputs = _aesthetic_processor(images=img, return_tensors="pt")["pixel_values"].to(device)
with torch.no_grad():
scores = _aesthetic_scorer(inputs)
labels = ["overall", "quality", "composition", "lighting", "color", "depth_of_field", "content"]
return {label: float(score.item()) for label, score in zip(labels, scores)}

async def score_photo_tips(req: Request) -> dict:
img_bytes = await req.body()
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
edge_map = _edges_intensity(img)
gray = _grayscale_np(img)
thirds_score = _rule_of_thirds_score(edge_map)
interest_score = _visual_interest_score(edge_map)
sharpness_score = _sharpness_score(gray)
composition = (interest_score * 0.8) + (thirds_score * 0.2)
sharpness_factor = 0.9 + (sharpness_score / 20.0)
model_scores = _score_with_aesthetic_model(img)
model_overall = (model_scores["overall"] * 2) if model_scores else None
base_overall = composition if model_overall is None else ((model_overall * 0.7) + (composition * 0.3))
overall_score = base_overall * sharpness_factor
tips = []
if thirds_score < 4:
tips.append("Try placing the subject near rule-of-thirds intersections.")
if interest_score < 4:
tips.append("Add more texture, contrast, or a clearer subject to increase visual interest.")
if sharpness_score < 4:
tips.append("Looks a bit soft; try a faster shutter or steadier shot.")
if not tips:
tips.append("Strong composition and visual interest.")
return {
"rule_of_thirds_score": round(thirds_score, 2),
"visual_interest_score": round(interest_score, 2),
"sharpness_score": round(sharpness_score, 2),
"overall_score": round(max(0.0, min(10.0, overall_score)) * 10, 1),
"model_scores": model_scores,
"tips": tips,
}
10 changes: 8 additions & 2 deletions apps/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import sys
import traceback
from aesthetic import score_aesthetic
from aesthetic import score_photo_tips
from classify import classify_image

# Setup logging once
Expand Down Expand Up @@ -38,6 +38,12 @@ async def classify_endpoint(req: Request):
@main_py_app.post("/scores")
async def score_endpoint(req: Request):
try:
return {"aesthetic_score": await score_aesthetic(req)}
result = await score_photo_tips(req)
# Backwards compatibility for callers/tests that still expect aesthetic_score.
if isinstance(result, dict) and "aesthetic_score" not in result:
score_value = result.get("overall_score")
if isinstance(score_value, (int, float)):
result["aesthetic_score"] = float(score_value)
return result
except Exception as e:
return error_response(e)
1 change: 1 addition & 0 deletions apps/api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ scikit-learn==1.5.0
timm==1.0.15
torch==2.0.1
torchvision==0.15.2
transformers==4.38.2
uvicorn==0.34.3
Loading