Skip to content

Commit 677e190

Browse files
committed
AI: Adjust Pigo parameters for improved Face Detection photoprism#5167
Signed-off-by: Michael Mayer <michael@photoprism.app>
1 parent ea693e9 commit 677e190

File tree

6 files changed

+152
-69
lines changed

6 files changed

+152
-69
lines changed

internal/ai/face/detector.go

Lines changed: 90 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -208,35 +208,45 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
208208
results = make(Faces, 0, len(det))
209209

210210
for _, face := range det {
211-
// Skip result if quality is too low.
212-
if face.Q < QualityThreshold(face.Scale) {
213-
continue
211+
score := face.Q
212+
scale := face.Scale
213+
requiredScore := QualityThreshold(scale)
214+
scaleMin := LandmarkQualityScaleMin
215+
scaleMax := LandmarkQualityScaleMax
216+
fallbackCandidate := false
217+
if !findLandmarks && score < requiredScore && score >= LandmarkQualityFloor && scale >= scaleMin && scale <= scaleMax && requiredScore-score <= LandmarkQualitySlack {
218+
fallbackCandidate = true
214219
}
215220

216221
faceCoord := NewArea(
217222
"face",
218223
face.Row,
219224
face.Col,
220-
face.Scale,
225+
scale,
221226
)
222227

223228
var eyesCoords []Area
224229
var landmarkCoords []Area
230+
var eyesFound bool
231+
232+
needLandmarks := (findLandmarks || fallbackCandidate) && scale > 50
225233

226-
if findLandmarks && face.Scale > 50 {
227-
eyesCoords = make([]Area, 0, 2)
234+
if needLandmarks {
235+
if findLandmarks {
236+
eyesCoords = make([]Area, 0, 2)
237+
}
228238

229-
scale := float32(face.Scale)
239+
scaleF := float32(scale)
230240
leftCandidate := pigo.Puploc{
231-
Row: face.Row - int(0.075*scale),
232-
Col: face.Col - int(0.175*scale),
233-
Scale: scale * 0.25,
241+
Row: face.Row - int(0.075*scaleF),
242+
Col: face.Col - int(0.175*scaleF),
243+
Scale: scaleF * 0.25,
234244
Perturbs: d.perturb,
235245
}
236246

237247
leftEye := plc.RunDetector(leftCandidate, params.ImageParams, d.landmarkAngle, false)
238248
leftEyeFound := leftEye.Row > 0 && leftEye.Col > 0
239-
if leftEyeFound {
249+
if leftEyeFound && findLandmarks {
240250
eyesCoords = append(eyesCoords, NewArea(
241251
"eye_l",
242252
leftEye.Row,
@@ -246,15 +256,15 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
246256
}
247257

248258
rightCandidate := pigo.Puploc{
249-
Row: face.Row - int(0.075*scale),
250-
Col: face.Col + int(0.185*scale),
251-
Scale: scale * 0.25,
259+
Row: face.Row - int(0.075*scaleF),
260+
Col: face.Col + int(0.185*scaleF),
261+
Scale: scaleF * 0.25,
252262
Perturbs: d.perturb,
253263
}
254264

255265
rightEye := plc.RunDetector(rightCandidate, params.ImageParams, d.landmarkAngle, false)
256266
rightEyeFound := rightEye.Row > 0 && rightEye.Col > 0
257-
if rightEyeFound {
267+
if rightEyeFound && findLandmarks {
258268
eyesCoords = append(eyesCoords, NewArea(
259269
"eye_r",
260270
rightEye.Row,
@@ -264,71 +274,83 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
264274
}
265275

266276
if leftEyeFound && rightEyeFound {
267-
landmarkCapacity := len(eyeCascades)*2 + len(mouthCascades) + 1
268-
landmarkCoords = make([]Area, 0, landmarkCapacity)
269-
270-
for _, eye := range eyeCascades {
271-
for _, flpc := range flpcs[eye] {
272-
if flpc == nil {
273-
continue
274-
}
275-
276-
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, false)
277-
if flp.Row > 0 && flp.Col > 0 {
278-
landmarkCoords = append(landmarkCoords, NewArea(
279-
eye,
280-
flp.Row,
281-
flp.Col,
282-
int(flp.Scale),
283-
))
284-
}
285-
286-
flp = flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, true)
287-
if flp.Row > 0 && flp.Col > 0 {
288-
landmarkCoords = append(landmarkCoords, NewArea(
289-
eye+"_v",
290-
flp.Row,
291-
flp.Col,
292-
int(flp.Scale),
293-
))
277+
eyesFound = true
278+
279+
if findLandmarks {
280+
landmarkCapacity := len(eyeCascades)*2 + len(mouthCascades) + 1
281+
landmarkCoords = make([]Area, 0, landmarkCapacity)
282+
283+
for _, eye := range eyeCascades {
284+
for _, flpc := range flpcs[eye] {
285+
if flpc == nil {
286+
continue
287+
}
288+
289+
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, false)
290+
if flp.Row > 0 && flp.Col > 0 {
291+
landmarkCoords = append(landmarkCoords, NewArea(
292+
eye,
293+
flp.Row,
294+
flp.Col,
295+
int(flp.Scale),
296+
))
297+
}
298+
299+
flp = flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, true)
300+
if flp.Row > 0 && flp.Col > 0 {
301+
landmarkCoords = append(landmarkCoords, NewArea(
302+
eye+"_v",
303+
flp.Row,
304+
flp.Col,
305+
int(flp.Scale),
306+
))
307+
}
294308
}
295309
}
296-
}
297310

298-
for _, mouth := range mouthCascades {
299-
for _, flpc := range flpcs[mouth] {
300-
if flpc == nil {
301-
continue
302-
}
303-
304-
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, false)
305-
if flp.Row > 0 && flp.Col > 0 {
306-
landmarkCoords = append(landmarkCoords, NewArea(
307-
"mouth_"+mouth,
308-
flp.Row,
309-
flp.Col,
310-
int(flp.Scale),
311-
))
311+
for _, mouth := range mouthCascades {
312+
for _, flpc := range flpcs[mouth] {
313+
if flpc == nil {
314+
continue
315+
}
316+
317+
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, false)
318+
if flp.Row > 0 && flp.Col > 0 {
319+
landmarkCoords = append(landmarkCoords, NewArea(
320+
"mouth_"+mouth,
321+
flp.Row,
322+
flp.Col,
323+
int(flp.Scale),
324+
))
325+
}
312326
}
313327
}
314-
}
315328

316-
if cascades := flpcs["lp84"]; len(cascades) > 0 {
317-
if flpc := cascades[0]; flpc != nil {
318-
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, true)
319-
if flp.Row > 0 && flp.Col > 0 {
320-
landmarkCoords = append(landmarkCoords, NewArea(
321-
"lp84",
322-
flp.Row,
323-
flp.Col,
324-
int(flp.Scale),
325-
))
329+
if cascades := flpcs["lp84"]; len(cascades) > 0 {
330+
if flpc := cascades[0]; flpc != nil {
331+
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, true)
332+
if flp.Row > 0 && flp.Col > 0 {
333+
landmarkCoords = append(landmarkCoords, NewArea(
334+
"lp84",
335+
flp.Row,
336+
flp.Col,
337+
int(flp.Scale),
338+
))
339+
}
326340
}
327341
}
328342
}
329343
}
330344
}
331345

346+
if eyesFound && fallbackCandidate && requiredScore > LandmarkQualityFloor {
347+
requiredScore = LandmarkQualityFloor
348+
}
349+
350+
if score < requiredScore {
351+
continue
352+
}
353+
332354
// Create face.
333355
f := Face{
334356
Rows: params.ImageParams.Rows,

internal/ai/face/detector_test.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/photoprism/photoprism/pkg/fs/fastwalk"
1515
)
1616

17+
var benchmarkFacesCount int
18+
1719
func TestDetect(t *testing.T) {
1820
expected := map[string]int{
1921
"1.jpg": 1,
@@ -152,7 +154,23 @@ func TestDetectLandmarkCounts(t *testing.T) {
152154
}
153155
}
154156

155-
var benchmarkFacesCount int
157+
func TestDetectQualityFallback(t *testing.T) {
158+
t.SkipNow()
159+
faces, err := Detect("testdata/<no public test image available>.jpg", false, 20)
160+
require.NoError(t, err)
161+
require.NotEmpty(t, faces)
162+
163+
found := false
164+
165+
for _, face := range faces {
166+
if face.Score < int(QualityThreshold(face.Area.Scale)) {
167+
found = true
168+
break
169+
}
170+
}
171+
172+
require.Truef(t, found, "expected at least one face below the quality threshold, got %+v", faces)
173+
}
156174

157175
func BenchmarkDetectorFacesLandmarks(b *testing.B) {
158176
const sample = "testdata/18.jpg"

internal/ai/face/faces.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@ func (faces Faces) Count() int {
2626
return len(faces)
2727
}
2828

29+
// MaxScale returns the largest face scale in pixels.
30+
func (faces Faces) MaxScale() (max int) {
31+
for _, f := range faces {
32+
if f.Area.Scale > max {
33+
max = f.Area.Scale
34+
}
35+
}
36+
37+
return max
38+
}
39+
2940
// Uncertainty returns the max face detection uncertainty in percent.
3041
func (faces Faces) Uncertainty() int {
3142
if len(faces) < 1 {

internal/ai/face/thresholds.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ var CropSize = crop.Sizes[crop.Tile160] // Face image crop size for Fac
88
var OverlapThreshold = 42 // Face area overlap threshold in percent.
99
var OverlapThresholdFloor = OverlapThreshold - 1 // Reduced overlap area to avoid rounding inconsistencies.
1010
var ScoreThreshold = 9.0 // Min face score.
11+
var LandmarkQualityFloor = float32(5.0) // Min score when both eyes are located.
12+
var LandmarkQualityScaleMin = 60 // Min face size eligible for landmark-based quality fallback.
13+
var LandmarkQualityScaleMax = 90 // Max face size eligible for landmark-based quality fallback.
14+
var LandmarkQualitySlack = float32(4.0) // Max allowed gap between quality threshold and score.
1115
var ClusterScoreThreshold = 15 // Min score for faces forming a cluster.
1216
var SizeThreshold = 50 // Min face size in pixels.
1317
var ClusterSizeThreshold = 80 // Min size for faces forming a cluster in pixels.

internal/entity/src.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const (
3333
SrcLocation Src = classify.SrcLocation // Prio 8
3434
SrcMarker Src = "marker" // Prio 8
3535
SrcImage Src = classify.SrcImage // Prio 8
36+
SrcONNX Src = "onnx"
3637
SrcOllama Src = "ollama"
3738
SrcOpenAI Src = "openai"
3839
SrcTitle Src = classify.SrcTitle // Prio 16
@@ -69,6 +70,7 @@ var SrcPriority = Priorities{
6970
SrcLocation: 8,
7071
SrcMarker: 8,
7172
SrcImage: 8,
73+
SrcONNX: 16,
7274
SrcOllama: 16,
7375
SrcOpenAI: 16,
7476
SrcTitle: 16,
@@ -117,6 +119,7 @@ var SrcDesc = map[Src]string{
117119
SrcLocation: "GPS Position",
118120
SrcMarker: "Object Detection",
119121
SrcImage: "Computer Vision (default)",
122+
SrcONNX: "Computer Vision (ONNX)",
120123
SrcOllama: "Computer Vision (Ollama)",
121124
SrcOpenAI: "Computer Vision (OpenAI)",
122125
SrcTitle: "Picture Title",

internal/photoprism/index_faces.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,31 @@ func (ind *Index) Faces(jpeg *MediaFile, expected int) face.Faces {
4646
log.Debugf("vision: %s in %s (detect faces)", err, clean.Log(jpeg.BaseName()))
4747
}
4848

49+
if thumbSize != thumb.Fit1280 {
50+
needRetry := len(faces) == 0
51+
52+
if !needRetry && expected > 0 && len(faces) < expected {
53+
needRetry = true
54+
}
55+
56+
if !needRetry && len(faces) > 0 && faces.MaxScale() < 96 {
57+
needRetry = true
58+
}
59+
60+
if needRetry {
61+
if altThumb, altErr := jpeg.Thumbnail(Config().ThumbCachePath(), thumb.Fit1280); altErr != nil {
62+
log.Debugf("vision: %s in %s (detect faces @1280)", altErr, clean.Log(jpeg.BaseName()))
63+
} else if altThumb == "" {
64+
log.Debugf("vision: thumb %s not found in %s (detect faces @1280)", thumb.Fit1280, clean.Log(jpeg.BaseName()))
65+
} else if retryFaces, retryErr := vision.Faces(altThumb, Config().FaceSize(), true, expected); retryErr != nil {
66+
log.Debugf("vision: %s in %s (detect faces @1280)", retryErr, clean.Log(jpeg.BaseName()))
67+
} else if len(retryFaces) > 0 {
68+
log.Debugf("vision: retry face detection for %s using %s", clean.Log(jpeg.BaseName()), thumb.Fit1280)
69+
faces = retryFaces
70+
}
71+
}
72+
}
73+
4974
if l := len(faces); l > 0 {
5075
log.Infof("vision: found %s in %s [%s]", english.Plural(l, "face", "faces"), clean.Log(jpeg.BaseName()), time.Since(start))
5176
}

0 commit comments

Comments
 (0)