-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_samples.py
More file actions
99 lines (77 loc) · 3.26 KB
/
generate_samples.py
File metadata and controls
99 lines (77 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""
Generate sample face embeddings for testing and demonstration.
This script creates synthetic embeddings that mimic facenet-pytorch output.
"""
import numpy as np
import json
from pathlib import Path
def generate_sample_embeddings(output_dir: Path,
n_users: int = 5,
samples_per_user: int = 3,
embedding_dim: int = 512,
seed: int = 42):
"""
Generate synthetic face embeddings for testing.
Args:
output_dir: Directory to save embeddings
n_users: Number of users
samples_per_user: Number of samples per user
embedding_dim: Dimension of embeddings
seed: Random seed for reproducibility
"""
output_dir.mkdir(parents=True, exist_ok=True)
rng = np.random.default_rng(seed)
metadata = {
"description": "Synthetic face embeddings for testing",
"n_users": n_users,
"samples_per_user": samples_per_user,
"embedding_dim": embedding_dim,
"users": []
}
user_names = [f"user{i+1:02d}" for i in range(n_users)]
for user_id in user_names:
# Generate a base embedding for this user
base_embedding = rng.normal(0, 1, size=embedding_dim)
base_embedding = base_embedding / np.linalg.norm(base_embedding)
user_files = []
# Generate variations (same person, slight variations)
for sample_idx in range(samples_per_user):
# Add small random noise to create variation
# Use smaller noise (0.02) for realistic intra-class variance
noise = rng.normal(0, 0.02, size=embedding_dim)
embedding = base_embedding + noise
# Normalize
embedding = embedding / np.linalg.norm(embedding)
# Save
filename = f"{user_id}_{sample_idx+1:02d}.npy"
filepath = output_dir / filename
np.save(filepath, embedding)
user_files.append(filename)
metadata["users"].append({
"user_id": user_id,
"files": user_files
})
# Save metadata
with open(output_dir / "metadata.json", 'w') as f:
json.dump(metadata, f, indent=2)
print(f"Generated {n_users * samples_per_user} sample embeddings")
print(f"Saved to: {output_dir}")
print(f"Users: {', '.join(user_names)}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate sample embeddings")
parser.add_argument("--output", type=str, default="embeddings",
help="Output directory (default: embeddings/)")
parser.add_argument("--n-users", type=int, default=5,
help="Number of users (default: 5)")
parser.add_argument("--samples", type=int, default=3,
help="Samples per user (default: 3)")
parser.add_argument("--dim", type=int, default=512,
help="Embedding dimension (default: 512)")
args = parser.parse_args()
generate_sample_embeddings(
Path(args.output),
n_users=args.n_users,
samples_per_user=args.samples,
embedding_dim=args.dim
)