Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ wheels/
*.egg-info/
.installed.cfg
*.egg
# /home/svision/experiments/recognition/insightface/recognition/arcface_torch/wandb
recognition/arcface_torch/wandb/
recognition/volumes/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
47 changes: 47 additions & 0 deletions recognition/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1

# System deps (opencv runtime libs + basic build tools)
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip python3-dev python3-venv \
git curl ca-certificates \
build-essential pkg-config \
libglib2.0-0 libsm6 libxext6 libxrender1 \
&& rm -rf /var/lib/apt/lists/*

RUN python3 -m pip install --upgrade pip setuptools wheel

# ---- PyTorch (CUDA 12.8 / cu128) ----
ARG TORCH_VERSION=2.10.0
ARG TORCHVISION_VERSION=0.25.0
ARG TORCHAUDIO_VERSION=2.10.0

# Use extra-index-url so deps can still come from PyPI
RUN python3 -m pip install \
torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
--extra-index-url https://download.pytorch.org/whl/cu128

# ---- Your packages ----
RUN python3 -m pip install \
"numpy<1.24" \
tensorboard \
easydict \
mxnet \
onnx \
scikit-learn \
tqdm \
wandb \
"opencv-python-headless<4.10"

# Put code into the image
WORKDIR /workspace
COPY . /workspace

# If the repo has a requirements.txt, uncomment:
# RUN python3 -m pip install -r requirements.txt

CMD ["bash"]
2 changes: 2 additions & 0 deletions recognition/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## Face Recognition

This is the forked version of original Insightface repo which I modified and used to train face reco models.
Many links and descriptions remains similar to original repo, I've just added some utility scripts and files needed to create training / testing environment.

<div align="left">
<img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="240"/>
Expand Down
2 changes: 1 addition & 1 deletion recognition/arcface_torch/backbones/iresnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
return nn.Sequential(*layers)

def forward(self, x):
with torch.cuda.amp.autocast(self.fp16):
with torch.amp.autocast('cuda', enabled=self.fp16):
x = self.conv1(x)
x = self.bn1(x)
x = self.prelu(x)
Expand Down
37 changes: 37 additions & 0 deletions recognition/arcface_torch/configs/merged_ms1m_glint_r100.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os

from easydict import EasyDict as edict

config = edict()
config.margin_list = (1.0, 0.5, 0.0)
config.network = "r100"
config.resume = False
config.output = "/output/merged_ms1m_glint_r100"
config.embedding_size = 512
config.sample_rate = 0.2
config.fp16 = True
config.momentum = 0.9
config.weight_decay = 1e-4
config.batch_size = 320
config.lr = 0.1
config.verbose = 2000
config.dali = False
config.num_workers = 24 # Try to change this according to the number of CPU cores, 12 is good for 16 cores, but if you have 32 cores, you can set it to 24 or 28

config.rec = "/datasets/merged_ms1m_glint"
config.num_classes = 453663
config.num_image = 22271167
config.num_epoch = 20
config.warmup_epoch = 2
config.val_targets = ['lfw', 'cfp_fp', "agedb_30"]
config.save_all_states = False# To save GPU memory, only save the state of backbone, not the state of partial fc and optimizer

config.using_wandb = True
config.wandb_key = os.getenv("WANDB_API_KEY")
config.wandb_entity = os.getenv("WANDB_ENTITY")
config.wandb_project = os.getenv("WANDB_PROJECT")
config.wandb_log_all = True
config.wandb_resume = False
config.suffix_run_name = "merged_ms1m_glint_r100"
config.notes = "Training r100 on merged MS1MV3 + Glint360K dataset"

6 changes: 3 additions & 3 deletions recognition/arcface_torch/configs/ms1mv2_r100.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
config.margin_list = (1.0, 0.5, 0.0)
config.network = "r100"
config.resume = False
config.output = None
config.output = "/output/ms1mv2_r100"
config.embedding_size = 512
config.sample_rate = 1.0
config.fp16 = True
config.momentum = 0.9
config.weight_decay = 5e-4
config.batch_size = 128
config.batch_size = 64
config.lr = 0.1
config.verbose = 2000
config.dali = False

config.rec = "/train_tmp/faces_emore"
config.rec = "/datasets/faces_emore"
config.num_classes = 85742
config.num_image = 5822653
config.num_epoch = 20
Expand Down
2 changes: 1 addition & 1 deletion recognition/arcface_torch/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_dataloader(
dali = False,
dali_aug = False,
seed = 2048,
num_workers = 2,
num_workers = 4,
) -> Iterable:

rec = os.path.join(root_dir, 'train.rec')
Expand Down
4 changes: 2 additions & 2 deletions recognition/arcface_torch/lr_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import warnings

class PolynomialLRWarmup(_LRScheduler):
def __init__(self, optimizer, warmup_iters, total_iters=5, power=1.0, last_epoch=-1, verbose=False):
super().__init__(optimizer, last_epoch=last_epoch, verbose=verbose)
def __init__(self, optimizer, warmup_iters, total_iters=5, power=1.0, last_epoch=-1, **kwargs):
super().__init__(optimizer, last_epoch=last_epoch)
self.total_iters = total_iters
self.power = power
self.warmup_iters = warmup_iters
Expand Down
2 changes: 1 addition & 1 deletion recognition/arcface_torch/partial_fc_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def forward(
else:
weight = self.weight

with torch.cuda.amp.autocast(self.fp16):
with torch.amp.autocast('cuda', enabled=self.fp16):
norm_embeddings = normalize(embeddings)
norm_weight_activated = normalize(weight)
logits = linear(norm_embeddings, norm_weight_activated)
Expand Down
94 changes: 94 additions & 0 deletions recognition/arcface_torch/svision_scripts/copy_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import pysftp
import sys, os
import time
import logging
import glob

from multiprocessing import Pool
from multiprocessing import cpu_count
import numpy as np
import multiprocessing

logging.raiseExceptions=False
def chunk(l, n):
# loop over the list in n-sized chunks
for i in range(0, len(l), n):
# yield the current n-sized chunk to the calling function
yield l[i: i + n]

def copy_func(payloads):
with pysftp.Connection(host=myHostname, username=myUsername, password=myPassword, cnopts=cnopts) as sftp:
print("Connection succesfully stablished ... ")

cpu_amount = float(cpu_count())
cpu_id = float(multiprocessing.current_process().name.split("-")[1])

outputPath = payloads["output_path"]
k=0
for input_path in payloads["input_paths"]:
sftp.put(input_path,outputPath+input_path.split("/")[-1])

if k%1000==0:
print("LEFT {}".format(len(payloads["input_paths"])-k))
k+=1

#ip adress computera
myHostname = "10.16.107.15"
#login computer
myUsername = "umai"
#pswd computer
myPassword = "passw0rd13!"

cnopts = pysftp.CnOpts()
cnopts.hostkeys = None

folders = '/home/umai/'
remote_folder = '/photo/'

folders_lst = ['ud_gr_photos']

print('Amount of folders:', len(folders_lst))

for folder_name in folders_lst:
#path of 13mln photo
path = folders + folder_name + '/'
print('Source:', path)
#type of photo
file_type = '*.ldr'
#end path of photo where it will be
remote_path = remote_folder + folder_name + '/'
if not os.path.exists(remote_path):
os.makedirs(remote_path)
print('Destination:', remote_path)

pictures = sorted(glob.glob(path + file_type))
print(len(pictures))

procs = cpu_count()
procIDs = list(range(0, procs))

PicturesPerProc = len(pictures) / float(procs)
PicturesPerProc = int(np.ceil(PicturesPerProc))

chunkedPaths = list(chunk(pictures, PicturesPerProc))

payloads = []
for (i, imagePaths) in enumerate(chunkedPaths):
data = {
"input_paths": imagePaths,
"output_path": remote_path
}
payloads.append(data)
#print(payloads)

start = time.time()

pool = Pool(processes=procs)
pool.map(copy_func, payloads)

print("[INFO] waiting for processes to finish...")

pool.close()
pool.join()

print(time.time()-start)
103 changes: 103 additions & 0 deletions recognition/arcface_torch/svision_scripts/copy_files_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#import pysftp
import sys, os
import time
import logging
import glob
import shutil

from multiprocessing import Pool
from multiprocessing import cpu_count
# import numpy as np
import multiprocessing
import math

logging.raiseExceptions=False
def chunk(l, n):
# loop over the list in n-sized chunks
for i in range(0, len(l), n):
# yield the current n-sized chunk to the calling function
yield l[i: i + n]

def copy_func(payloads):
#with pysftp.Connection(host=myHostname, username=myUsername, password=myPassword, cnopts=cnopts) as sftp:
#print("Connection succesfully stablished ... ")

cpu_amount = float(cpu_count())
cpu_id = float(multiprocessing.current_process().name.split("-")[1])

outputPath = payloads["output_path"]
k=0
time1 = time.time()
for input_path in payloads["input_paths"]:
dst_dir = os.path.join(outputPath, input_path.split("/")[-2])
os.makedirs(dst_dir, exist_ok=True)
shutil.copy2(input_path, os.path.join(dst_dir, input_path.split("/")[-1]))

if k%1000==0:
print("LEFT {}".format(len(payloads["input_paths"])-k))
print(time.time()-time1)
k+=1

#ip adress computera
myHostname = "172.30.10.117"
#login computer
myUsername = "svision"
#pswd computer
myPassword = "1q2w3e"

# cnopts = pysftp.CnOpts()
# cnopts.hostkeys = None

#path of 13mln photo
path = '/data/datasets/recognition/merged_ms1m_glint/'
#path = '/media/tengrilab/NewHDD/FOTO_SSD_NEW_201-19/'
#list_of_folders = next(os.walk(path))[1]

#pictures = []
#i=0
#for folder in list_of_folders:
# pictures = pictures + glob.glob(path+folder+'/*')
# print(len(pictures),pictures[-1])
#i = i+1
#if i==5:
# break
#type of photo
file_type = '*/*'
#end path of photo where it will be
remote_path = '/home/svision/datasets/merged_ms1m_glint_copy/'

pictures = sorted(glob.glob(path + file_type))
print(len(pictures))

procs = cpu_count()
procIDs = list(range(0, procs))

PicturesPerProc = len(pictures) / float(procs)
PicturesPerProc = int(math.ceil(PicturesPerProc))
print(PicturesPerProc)

chunkedPaths = list(chunk(pictures, PicturesPerProc))

payloads = []
for (i, imagePaths) in enumerate(chunkedPaths):
data = {
"input_paths": imagePaths,
"output_path": remote_path
}
payloads.append(data)
#print(payloads)

start = time.time()

# with pysftp.Connection(host=myHostname, username=myUsername, password=myPassword, cnopts=cnopts) as sftp:
# print("Connection succesfully stablished ... ")

pool = Pool(processes=procs)
pool.map(copy_func, payloads)

print("[INFO] waiting for processes to finish...")

pool.close()
pool.join()

print(time.time()-start)
Loading