Add files via upload

bubbliiiing · web-flow · commit 528aacc573ee · 2022-01-13T01:12:52.000+08:00
diff --git a/kmeans_for_anchors.py b/kmeans_for_anchors.py
@@ -1,29 +1,32 @@
-#-------------------------------------------------------------------------------------------------#
+#-------------------------------------------------------------------------------------------------------#
 #   kmeans虽然会对数据集中的框进行聚类，但是很多数据集由于框的大小相近，聚类出来的9个框相差不大，
-#   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框，越浅的特征层适合越大的先验框
+#   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框，shape越小的特征层适合越大的先验框
 #   原始网络的先验框已经按大中小比例分配好了，不进行聚类也会有非常好的效果。
-#-------------------------------------------------------------------------------------------------#
+#-------------------------------------------------------------------------------------------------------#
 import glob
 import xml.etree.ElementTree as ET
 
+import matplotlib.pyplot as plt
 import numpy as np
+from tqdm import tqdm
 
-def cas_iou(box,cluster):
-    x = np.minimum(cluster[:,0],box[0])
-    y = np.minimum(cluster[:,1],box[1])
+
+def cas_iou(box, cluster):
+    x = np.minimum(cluster[:, 0], box[0])
+    y = np.minimum(cluster[:, 1], box[1])
 
     intersection = x * y
     area1 = box[0] * box[1]
 
     area2 = cluster[:,0] * cluster[:,1]
-    iou = intersection / (area1 + area2 -intersection)
+    iou = intersection / (area1 + area2 - intersection)
 
     return iou
 
-def avg_iou(box,cluster):
-    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])
+def avg_iou(box, cluster):
+    return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])])
 
-def kmeans(box,k):
+def kmeans(box, k):
     #-------------------------------------------------------------#
     #   取出一共有多少框
     #-------------------------------------------------------------#
@@ -32,30 +35,32 @@ def kmeans(box,k):
     #-------------------------------------------------------------#
     #   每个框各个点的位置
     #-------------------------------------------------------------#
-    distance = np.empty((row,k))
+    distance = np.empty((row, k))
     
     #-------------------------------------------------------------#
     #   最后的聚类位置
     #-------------------------------------------------------------#
-    last_clu = np.zeros((row,))
+    last_clu = np.zeros((row, ))
 
     np.random.seed()
 
     #-------------------------------------------------------------#
     #   随机选5个当聚类中心
     #-------------------------------------------------------------#
-    cluster = box[np.random.choice(row,k,replace = False)]
+    cluster = box[np.random.choice(row, k, replace = False)]
+
+    iter = 0
     while True:
         #-------------------------------------------------------------#
-        #   计算每一行距离五个点的iou情况。
+        #   计算当前框和先验框的宽高比例
         #-------------------------------------------------------------#
         for i in range(row):
-            distance[i] = 1 - cas_iou(box[i],cluster)
+            distance[i] = 1 - cas_iou(box[i], cluster)
         
         #-------------------------------------------------------------#
         #   取出最小点
         #-------------------------------------------------------------#
-        near = np.argmin(distance,axis=1)
+        near = np.argmin(distance, axis=1)
 
         if (last_clu == near).all():
             break
@@ -68,18 +73,21 @@ def kmeans(box,k):
                 box[near == j],axis=0)
 
         last_clu = near
+        if iter % 5 == 0:
+            print('iter: {:d}. avg_iou:{:.2f}'.format(iter, avg_iou(box, cluster)))
+        iter += 1
 
-    return cluster
+    return cluster, near
 
 def load_data(path):
     data = []
     #-------------------------------------------------------------#
     #   对于每一个xml都寻找box
     #-------------------------------------------------------------#
-    for xml_file in glob.glob('{}/*xml'.format(path)):
-        tree = ET.parse(xml_file)
-        height = int(tree.findtext('./size/height'))
-        width = int(tree.findtext('./size/width'))
+    for xml_file in tqdm(glob.glob('{}/*xml'.format(path))):
+        tree    = ET.parse(xml_file)
+        height  = int(tree.findtext('./size/height'))
+        width   = int(tree.findtext('./size/width'))
         if height<=0 or width<=0:
             continue
         
@@ -97,42 +105,59 @@ def load_data(path):
             xmax = np.float64(xmax)
             ymax = np.float64(ymax)
             # 得到宽高
-            data.append([xmax-xmin,ymax-ymin])
+            data.append([xmax - xmin, ymax - ymin])
     return np.array(data)
 
-
 if __name__ == '__main__':
+    np.random.seed(0)
     #-------------------------------------------------------------#
     #   运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
     #   会生成yolo_anchors.txt
     #-------------------------------------------------------------#
-    SIZE        = 416
+    input_shape = [416, 416]
     anchors_num = 9
     #-------------------------------------------------------------#
     #   载入数据集，可以使用VOC的xml
     #-------------------------------------------------------------#
-    path        = r'./VOCdevkit/VOC2007/Annotations'
+    path        = 'VOCdevkit/VOC2007/Annotations'
     
     #-------------------------------------------------------------#
     #   载入所有的xml
     #   存储格式为转化为比例后的width,height
     #-------------------------------------------------------------#
+    print('Load xmls.')
     data = load_data(path)
+    print('Load xmls done.')
     
     #-------------------------------------------------------------#
     #   使用k聚类算法
     #-------------------------------------------------------------#
-    out = kmeans(data,anchors_num)
-    out = out[np.argsort(out[:,0])]
-    print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
-    print(out*SIZE)
-    data = out*SIZE
+    print('K-means boxes.')
+    cluster, near   = kmeans(data, anchors_num)
+    print('K-means boxes done.')
+    data            = data * np.array([input_shape[1], input_shape[0]])
+    cluster         = cluster * np.array([input_shape[1], input_shape[0]])
+
+    #-------------------------------------------------------------#
+    #   绘图
+    #-------------------------------------------------------------#
+    for j in range(anchors_num):
+        plt.scatter(data[near == j][:,0], data[near == j][:,1])
+        plt.scatter(cluster[j][0], cluster[j][1], marker='x', c='black')
+    plt.show()
+    plt.savefig("kmeans_for_anchors.jpg")
+    print('Save kmeans_for_anchors.jpg in root dir.')
+
+    cluster = cluster[np.argsort(cluster[:, 0] * cluster[:, 1])]
+    print('avg_ratio:{:.2f}'.format(avg_iou(data, cluster)))
+    print(data)
+
     f = open("yolo_anchors.txt", 'w')
-    row = np.shape(data)[0]
+    row = np.shape(cluster)[0]
     for i in range(row):
         if i == 0:
-            x_y = "%d,%d" % (data[i][0], data[i][1])
+            x_y = "%d,%d" % (cluster[i][0], cluster[i][1])
         else:
-            x_y = ", %d,%d" % (data[i][0], data[i][1])
+            x_y = ", %d,%d" % (cluster[i][0], cluster[i][1])
         f.write(x_y)
     f.close()