|
89 | 89 | { |
90 | 90 | "data": { |
91 | 91 | "text/plain": [ |
92 | | - "'0.921'" |
| 92 | + "'0.922'" |
93 | 93 | ] |
94 | 94 | }, |
95 | 95 | "execution_count": 1, |
|
186 | 186 | " and use it in accordance with the terms of the license.\n", |
187 | 187 | " For more information, please see: https://github.com/facebookresearch/dinov2/blob/main/LICENSE\n", |
188 | 188 | "FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n", |
189 | | - "2023-04-24 13:47:19 [INFO] Found resent/efficientnet/dinov2 model, setting up normalization\n", |
190 | | - "2023-04-24 13:47:19 [INFO] Going to loop over dir images\n", |
191 | | - "2023-04-24 13:47:19 [INFO] Found total 7390 images to run on, 7390 train, 0 test, name list 7390, counter 7390 \n", |
192 | | - "2023-04-24 13:47:26 [ERROR] Failed to read image images/Abyssinian_34.jpg\n", |
193 | | - "2023-04-24 13:49:13 [ERROR] Failed to read image images/Egyptian_Mau_139.jpg\n", |
194 | | - "2023-04-24 13:49:13 [ERROR] Failed to read image images/Egyptian_Mau_145.jpg\n", |
195 | | - "2023-04-24 13:49:15 [ERROR] Failed to read image images/Egyptian_Mau_167.jpg\n", |
196 | | - "2023-04-24 13:49:15 [ERROR] Failed to read image images/Egyptian_Mau_177.jpg\n", |
197 | | - "2023-04-24 13:49:16 [ERROR] Failed to read image images/Egyptian_Mau_191.jpg\n", |
198 | | - "2023-04-24 13:54:04 [INFO] Found total 7390 images to run on\n", |
199 | | - "Finished histogram 1.135\n", |
200 | | - "Finished bucket sort 1.155\n", |
201 | | - "2023-04-24 13:54:04 [INFO] 79) Finished write_index() NN model\n", |
202 | | - "2023-04-24 13:54:04 [INFO] Stored nn model index file fastdup_work_dir/nnf.index\n", |
203 | | - "2023-04-24 13:54:04 [INFO] Total time took 405302 ms\n", |
204 | | - "2023-04-24 13:54:04 [INFO] Found a total of 118 fully identical images (d>0.990), which are 0.53 %\n", |
205 | | - "2023-04-24 13:54:04 [INFO] Found a total of 14 nearly identical images(d>0.980), which are 0.06 %\n", |
206 | | - "2023-04-24 13:54:04 [INFO] Found a total of 511 above threshold images (d>0.900), which are 2.30 %\n", |
207 | | - "2023-04-24 13:54:04 [INFO] Found a total of 739 outlier images (d<0.050), which are 3.33 %\n", |
208 | | - "2023-04-24 13:54:04 [INFO] Min distance found 0.203 max distance 1.000\n", |
209 | | - "2023-04-24 13:54:04 [INFO] Running connected components for ccthreshold 0.800000 \n", |
| 189 | + "2023-05-02 12:09:03 [INFO] Found resent/efficientnet/dinov2 model, setting up normalization\n", |
| 190 | + "2023-05-02 12:09:03 [INFO] Going to loop over dir images\n", |
| 191 | + "2023-05-02 12:09:03 [INFO] Found total 7390 images to run on, 7390 train, 0 test, name list 7390, counter 7390 \n", |
| 192 | + "2023-05-02 12:09:10 [ERROR] Failed to read image images/Abyssinian_34.jpg\n", |
| 193 | + "2023-05-02 12:10:57 [ERROR] Failed to read image images/Egyptian_Mau_139.jpg\n", |
| 194 | + "2023-05-02 12:10:57 [ERROR] Failed to read image images/Egyptian_Mau_145.jpg\n", |
| 195 | + "2023-05-02 12:10:58 [ERROR] Failed to read image images/Egyptian_Mau_167.jpg\n", |
| 196 | + "2023-05-02 12:10:59 [ERROR] Failed to read image images/Egyptian_Mau_177.jpg\n", |
| 197 | + "2023-05-02 12:11:00 [ERROR] Failed to read image images/Egyptian_Mau_191.jpg\n", |
| 198 | + "2023-05-02 12:15:19 [INFO] Found total 7390 images to run on\n", |
| 199 | + "Finished histogram 1.164\n", |
| 200 | + "Finished bucket sort 1.183\n", |
| 201 | + "2023-05-02 12:15:20 [INFO] 90) Finished write_index() NN model\n", |
| 202 | + "2023-05-02 12:15:20 [INFO] Stored nn model index file fastdup_work_dir/nnf.index\n", |
| 203 | + "2023-05-02 12:15:20 [INFO] Total time took 376277 ms\n", |
| 204 | + "2023-05-02 12:15:20 [INFO] Found a total of 118 fully identical images (d>0.990), which are 0.53 %\n", |
| 205 | + "2023-05-02 12:15:20 [INFO] Found a total of 14 nearly identical images(d>0.980), which are 0.06 %\n", |
| 206 | + "2023-05-02 12:15:20 [INFO] Found a total of 511 above threshold images (d>0.900), which are 2.30 %\n", |
| 207 | + "2023-05-02 12:15:20 [INFO] Found a total of 739 outlier images (d<0.050), which are 3.33 %\n", |
| 208 | + "2023-05-02 12:15:20 [INFO] Min distance found 0.229 max distance 1.000\n", |
| 209 | + "2023-05-02 12:15:20 [INFO] Running connected components for ccthreshold 0.800000 \n", |
210 | 210 | ".0\n", |
211 | 211 | " ########################################################################################\n", |
212 | 212 | "\n", |
|
222 | 222 | " For a detailed analysis, use `.connected_components()`\n", |
223 | 223 | "(similarity threshold used is 0.9, connected component threshold used is 0.8).\n", |
224 | 224 | "\n", |
225 | | - " Outliers: 6.31% (466) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", |
| 225 | + " Outliers: 6.29% (465) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", |
226 | 226 | " For a detailed list of outliers, use `.outliers()`.\n" |
227 | 227 | ] |
228 | 228 | } |
|
241 | 241 | "source": [ |
242 | 242 | "## Image Clusters\n", |
243 | 243 | "\n", |
244 | | - "Let's debug the embedding quality by clustering group of similar images and visualizing them." |
| 244 | + "Let's debug the embedding quality by clustering group of similar images and visualizing them.\n", |
| 245 | + "\n", |
| 246 | + "In the visualization below, `component` refers to the cluster number. For example -\n", |
| 247 | + "\n", |
| 248 | + "- `component` `933` refers to cluster `933` found in the dataset.\n", |
| 249 | + "\n", |
| 250 | + "- `num_images` refers to the number of images in the cluster (`component`).\n", |
| 251 | + "\n", |
| 252 | + "- `mean_distance` refers to the mean distance of all the images in the cluster (`component`)." |
245 | 253 | ] |
246 | 254 | }, |
247 | 255 | { |
|
262 | 270 | "name": "stderr", |
263 | 271 | "output_type": "stream", |
264 | 272 | "text": [ |
265 | | - "100%|███████████████████████████████████| 20/20 [00:03<00:00, 5.50it/s]\n" |
| 273 | + "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:03<00:00, 5.42it/s]\n" |
266 | 274 | ] |
267 | 275 | }, |
268 | 276 | { |
|
271 | 279 | "text": [ |
272 | 280 | "Finished OK. Components are stored as image files fastdup_work_dir/galleries/components_[index].jpg\n", |
273 | 281 | "Stored components visual view in fastdup_work_dir/galleries/components.html\n", |
274 | | - "Execution time in seconds 5.9\n" |
| 282 | + "Execution time in seconds 6.0\n" |
275 | 283 | ] |
276 | 284 | }, |
277 | 285 | { |
|
1421 | 1429 | "source": [ |
1422 | 1430 | "print(\"Feature vector matrix dimensions\", feature_vec.shape)" |
1423 | 1431 | ] |
1424 | | - }, |
1425 | | - { |
1426 | | - "cell_type": "code", |
1427 | | - "execution_count": null, |
1428 | | - "id": "2JbfqfSPTuSC", |
1429 | | - "metadata": { |
1430 | | - "id": "2JbfqfSPTuSC" |
1431 | | - }, |
1432 | | - "outputs": [], |
1433 | | - "source": [] |
1434 | 1432 | } |
1435 | 1433 | ], |
1436 | 1434 | "metadata": { |
|
0 commit comments