forked from MCG-NJU/DDT
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_missing_latents.py
More file actions
60 lines (45 loc) · 2.01 KB
/
check_missing_latents.py
File metadata and controls
60 lines (45 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
import os
import glob
from tqdm import tqdm
def check_missing_latents():
original_root = "/mnt/nvme-fast/datasets/imagenet-2010"
latent_root = "/mnt/nvme-fast/datasets/imagenet-2010_256_latent"
print("正在检查缺失的latent文件...")
# 找到所有原始图片
image_extensions = ["*.JPEG", "*.jpeg", "*.jpg", "*.png"]
all_images = []
for ext in image_extensions:
pattern = os.path.join(original_root, "**", ext)
all_images.extend(glob.glob(pattern, recursive=True))
print(f"找到 {len(all_images)} 个原始图片文件")
missing_latents = []
existing_count = 0
for img_path in tqdm(all_images, desc="检查latent文件"):
# 构造对应的.pt文件路径
latent_path = img_path + ".pt"
latent_path = latent_path.replace(original_root, latent_root)
if not os.path.exists(latent_path):
missing_latents.append((img_path, latent_path))
else:
existing_count += 1
print(f"\n=== 检查结果 ===")
print(f"总图片数量: {len(all_images)}")
print(f"已有latent文件: {existing_count}")
print(f"缺失latent文件: {len(missing_latents)}")
if missing_latents:
print(f"\n前10个缺失的文件:")
for i, (img_path, latent_path) in enumerate(missing_latents[:10]):
print(f" {i+1}. 原图: {img_path}")
print(f" latent: {latent_path}")
# 按类别统计缺失情况
missing_by_class = {}
for img_path, _ in missing_latents:
class_name = os.path.basename(os.path.dirname(img_path))
missing_by_class[class_name] = missing_by_class.get(class_name, 0) + 1
print(f"\n按类别统计缺失文件数量:")
for class_name, count in sorted(missing_by_class.items()):
print(f" {class_name}: {count}")
return missing_latents
if __name__ == "__main__":
missing_files = check_missing_latents()