-
Notifications
You must be signed in to change notification settings - Fork 137
Description
Summary of issue
Hi,
I'm working on a project that does face recognition in real-time using ar glass(nreal light).
'shape_predictor_68_face_landmarks.dat' and 'dlib_face_recognition_resnet_model_v1.dat' are loaded successfully, but there seems to be a problem in the part where the operator of detector is called.
I tried two ways to use the camera texture in dlibdotnet.
First, I used nreal light's rgbcamera to get texture2d and converted it to Matrix type, but FrontalFaceDetector.Operator() doesn't seem to be applied.
Second, I used data of type byte[] and the result was same.
So my point is
1. I want to know if real-time face recognition is possible.
2. I wonder if data in byte[] format or Unity's texture2d format is available on dlibdotnet.
Any advice about this issue would be appreciated!
Environment
The ar glass model is nreal light and I am working in unity environment.
Also, android phone model is samsung galaxy s22.
Code:
using DlibDotNet;
using NRKernal;
using UnityEngine;
using System;
using UnityEngine.UI;
...
public class SpeakerRecognition : MonoBehaviour {
public RawImage captureImage;
public Button activeButton;
private bool isActivated = false;
private FrontalFaceDetector detector;
private ShapePredictor sp;
private DlibDotNet.Dnn.LossMetric net;
private List<Matrix<float>> knownFaces = new List<Matrix<float>>();
private List<string> knownNames = new List<string>();
private List<float> faceDiffs = new List<float>();
private NRRGBCamTexture camTexture;
void Start()
{
detector = Dlib.GetFrontalFaceDetector();
sp = ShapePredictor.Deserialize(Application.dataPath + "/Plugins/shape_predictor_68_face_landmarks.dat");
net = DlibDotNet.Dnn.LossMetric.Deserialize(Application.dataPath + "/Plugins/dlib_face_recognition_resnet_model_v1.dat");
LoadKnownFaces();
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
activeButton.onClick.AddListener(ToggleActivation);
}
private IEnumerator RecognizeSpeaker() {
// while camera is activated
if(isActivated){
// convert texture2d to matrix
int width = camTexture.Width;
int height = camTexture.Height;
Matrix<RgbPixel> img = new Matrix<RgbPixel>(height, width);
Color32[] pixels = camTexture.GetTexture().GetPixels32();
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
Color32 pixel = pixels[y * width + x];
RgbPixel rgbPixel = new RgbPixel(pixel.r, pixel.g, pixel.b);
img[y, x] = rgbPixel;
}
}
var faces = new List<Matrix<RgbPixel>>();
var shapes = new List<FullObjectDetection>();
foreach(var face in detector.Operator(img))
{
var shape = sp.Detect(img, face);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
shapes.Add(shape);
faces.Add(faceChip);
}
var faceDescriptors = net.Operator(faces);
for(int i = 0; i < faceDescriptors.Count; i++)
{
//
// DrawFaceLandmarks(shapes[i]);
//
debugLog.text = "descripter " + i;
faceDiffs.Clear();
for(int j = 0; j < knownFaces.Count; j++)
{
var diff = Dlib.Length(knownFaces[j] - faceDescriptors[i]);
faceDiffs.Add(diff);
}
var index = faceDiffs.IndexOf(minDiff);
var matchRate = 1 / (1 + minDiff);
if(matchRate > 0.5)
{
...
}
}
}
yield return null;
}
public void LoadKnownFaces() {
debugLog.text = "load known faces";
string relativePath = "NRSDK/Demos/OverTheGlass/Scripts/faces";
string absolutePath = Application.dataPath + "/" + relativePath;
foreach (var file in System.IO.Directory.GetFiles(absolutePath, "*.jpg"))
{
var img = Dlib.LoadImageAsMatrix<RgbPixel>(file);
var faces = detector.Operator(img);
if (faces.Any())
{
// Detect face
var shape = sp.Detect(img, faces[0]);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
// Get face descriptor
var faceDescriptor = net.Operator(faceChip);
// Add known face and name informations
knownFaces.Add(faceDescriptor.First());
knownNames.Add(System.IO.Path.GetFileNameWithoutExtension(file));
}
else
{
debugLog.text = "No face found in " + file;
}
}
lipDiffs = new List<double>(knownFaces.Count);
return;
}
public void ToggleActivation()
{
isActivated = !isActivated;
if (isActivated)
{
if(camTexture == null)
{
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
}
camTexture.Play();
captureImage.texture = camTexture.GetTexture();
StartCoroutine(RecognizeSpeaker());
}
else
{
camTexture?.Stop();
camTexture = null;
StopCoroutine(RecognizeSpeaker());
}
}
void OnDestroy()
{
camTexture?.Stop();
camTexture = null;
}
}
using System.Collections.Generic;
using DlibDotNet;
using NRKernal;
using UnityEngine;
using System;
using System.Linq;
using UnityEngine.UI;
using System.Collections;
using OpenCvSharp;
using System.Runtime.InteropServices;
public class SpeakerRecognition : MonoBehaviour {
public Text debugLog;
public RawImage captureImage;
public Text speakerName;
public Button activeButton;
public GameObject spherePrefab;
private bool isActivated = false;
private FrontalFaceDetector detector;
private ShapePredictor sp;
private DlibDotNet.Dnn.LossMetric net;
private List<Matrix<float>> knownFaces = new List<Matrix<float>>();
private List<string> knownNames = new List<string>();
private List<LipMovement> movements = new List<LipMovement>();
private (double, double) latestSpeakerPosition;
private List<float> faceDiffs = new List<float>();
private List<double> lipDiffs = new List<double>();
private NRRGBCamTexture camTexture;
private Texture2D mTex2d;
void Start()
{
detector = Dlib.GetFrontalFaceDetector();
sp = ShapePredictor.Deserialize(Application.dataPath + "/Plugins/shape_predictor_68_face_landmarks.dat");
net = DlibDotNet.Dnn.LossMetric.Deserialize(Application.dataPath + "/Plugins/dlib_face_recognition_resnet_model_v1.dat");
LoadKnownFaces();
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
mTex2d = camTexture.GetTexture();
activeButton.onClick.AddListener(ToggleActivation);
}
private IEnumerator RecognizeSpeaker() {
// 카메라 작동 중일 때 계속 실행
while(isActivated){
if(!isActivated || camTexture == null)
{
break;
}
var temp = new Mat();
// Get raw data from Texture2D
var array = mTex2d.GetRawTextureData();
Marshal.Copy(array, 0, temp.Data, array.Length);
var cimg = Dlib.LoadImageData<BgrPixel>(temp.Data, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()));
var faces = new List<Matrix<RgbPixel>>();
var shapes = new List<FullObjectDetection>();
foreach(var face in detector.Operator(cimg))
{
var shape = sp.Detect(img, face);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
shapes.Add(shape);
faces.Add(faceChip);
}
var faceDescriptors = net.Operator(faces);
for(int i = 0; i < faceDescriptors.Count; i++)
{
faceDiffs.Clear();
for(int j = 0; j < knownFaces.Count; j++)
{
var diff = Dlib.Length(knownFaces[j] - faceDescriptors[i]);
faceDiffs.Add(diff);
}
var minDiff = faceDiffs.Min();
var index = faceDiffs.IndexOf(minDiff);
var matchRate = 1 / (1 + minDiff);
if(matchRate > 0.5)
{
var eyeDistance = (shapes[i].GetPart(37) - shapes[i].GetPart(44)).Length;
var lipHeight = (shapes[i].GetPart(62) - shapes[i].GetPart(66)).Length;
var lipWidth = (shapes[i].GetPart(48) - shapes[i].GetPart(54)).Length;
(double, double) averageLength = movements[index].CheckMovement(lipWidth / eyeDistance * 100, lipHeight / eyeDistance * 100);
if(averageLength.Item1 > 2 || averageLength.Item2 > 2)
{
lipDiffs[index] = averageLength.Item1 + averageLength.Item2;
}
}
}
speakerName.text = knownNames[lipDiffs.IndexOf(lipDiffs.Min())];
}
yield return null;
}
public void LoadKnownFaces() {
debugLog.text = "load known faces";
string relativePath = "NRSDK/Demos/OverTheGlass/Scripts/faces";
string absolutePath = Application.dataPath + "/" + relativePath;
foreach (var file in System.IO.Directory.GetFiles(absolutePath, "*.jpg"))
{
var img = Dlib.LoadImageAsMatrix<RgbPixel>(file);
var faces = detector.Operator(img);
if (faces.Any())
{
// Detect face
var shape = sp.Detect(img, faces[0]);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
// Get face descriptor
var faceDescriptor = net.Operator(faceChip);
// Add known face and name informations
knownFaces.Add(faceDescriptor.First());
knownNames.Add(System.IO.Path.GetFileNameWithoutExtension(file));
}
else
{
Debug.Log("No face found in " + file);
}
}
lipDiffs = new List<double>(knownFaces.Count);
return;
}
internal class LipMovement
{
private string name;
private System.Collections.Generic.Queue<double> widthDiffs;
private System.Collections.Generic.Queue<double> heightDiffs;
private double prevHeight;
private double prevWidth;
public LipMovement(string name)
{
this.name = name;
widthDiffs = new System.Collections.Generic.Queue<double>(3);
heightDiffs = new System.Collections.Generic.Queue<double>(3);
prevHeight = 0;
prevWidth = 0;
}
public (double, double) CheckMovement(double width, double height)
{
heightDiffs.Enqueue(Math.Abs(prevHeight - height));
widthDiffs.Enqueue(Math.Abs(prevWidth - width));
List<double> widthNumbers = new List<double>(widthDiffs);
List<double> heightNumbers = new List<double>(heightDiffs);
double widthAverage = widthNumbers.Sum() / widthNumbers.Count;
double heightAverage = heightNumbers.Sum() / heightNumbers.Count;
prevHeight = height;
prevWidth = width;
return (Math.Round(widthAverage, 3), Math.Round(heightAverage, 3));
}
}
public void ToggleActivation()
{
isActivated = !isActivated;
if (isActivated)
{
if(camTexture == null)
{
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
}
camTexture.Play();
captureImage.texture = camTexture.GetTexture();
StartCoroutine(RecognizeSpeaker());
}
else
{
camTexture?.Stop();
camTexture = null;
StopCoroutine(RecognizeSpeaker());
}
}
void OnDestroy()
{
camTexture?.Stop();
camTexture = null;
}
}
Output:
I checked that the while statement, LoadKnownFaces() camera are running fine, but foreach(var face in detector.Opertaor(img)) doesn't seem to be running.