Skip to content

Commit b713c3d

Browse files
committed
Merge branch 'main' into dnth/visualize
2 parents de0e90b + 0cfe86b commit b713c3d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+31728
-12406
lines changed

.github/workflows/examples-ci.yml

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
name: Test Run Examples
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: '0 16 * * *' # Trigger at 4PM every day
7+
8+
jobs:
9+
test-quick-dataset-analysis:
10+
runs-on: ${{ matrix.os }}
11+
env:
12+
SENTRY_OPT_OUT: True
13+
strategy:
14+
matrix:
15+
os: [ubuntu-22.04]
16+
python-version: ['3.9']
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v3
20+
with:
21+
fetch-depth: 0
22+
23+
- name: Set up Python
24+
uses: actions/setup-python@v3
25+
with:
26+
python-version: ${{ matrix.python-version }}
27+
28+
- name: Install dependencies
29+
run: |
30+
python -m pip install --upgrade pip
31+
pip install fastdup matplotlib
32+
33+
- name: Download dataset
34+
run: |
35+
wget "https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz" -O "images.tar.gz"
36+
tar xf "images.tar.gz"
37+
38+
- name: Run example
39+
run: |
40+
python .github/workflows/tests/quick_dataset_analysis.py
41+
42+
- name: Save artifacts
43+
uses: actions/upload-artifact@v3
44+
with:
45+
name: fastdup_work_dir_quick_dataset_analysis
46+
path: fastdup_work_dir/
47+
48+
test-cleaning-image-dataset:
49+
runs-on: ${{ matrix.os }}
50+
env:
51+
SENTRY_OPT_OUT: True
52+
strategy:
53+
matrix:
54+
os: [ubuntu-latest]
55+
python-version: ['3.9']
56+
steps:
57+
- name: Checkout repository
58+
uses: actions/checkout@v3
59+
with:
60+
fetch-depth: 0
61+
62+
- name: Set up Python
63+
uses: actions/setup-python@v3
64+
with:
65+
python-version: ${{ matrix.python-version }}
66+
67+
- name: Install dependencies
68+
run: |
69+
python -m pip install --upgrade pip
70+
pip install fastdup matplotlib
71+
72+
- name: Download dataset
73+
run: |
74+
wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
75+
tar -xf food-101.tar.gz
76+
77+
- name: Run example
78+
run: |
79+
python .github/workflows/tests/cleaning_image_dataset.py
80+
81+
- name: Save artifacts
82+
uses: actions/upload-artifact@v3
83+
with:
84+
name: fastdup_work_dir_cleaning_image_dataset
85+
path: fastdup_work_dir/
86+
87+
test-labeled-image-classification:
88+
runs-on: ${{ matrix.os }}
89+
env:
90+
SENTRY_OPT_OUT: True
91+
strategy:
92+
matrix:
93+
os: [ubuntu-latest]
94+
python-version: ['3.9']
95+
steps:
96+
- name: Checkout repository
97+
uses: actions/checkout@v3
98+
with:
99+
fetch-depth: 0
100+
101+
- name: Set up Python
102+
uses: actions/setup-python@v3
103+
with:
104+
python-version: ${{ matrix.python-version }}
105+
106+
- name: Install dependencies
107+
run: |
108+
python -m pip install --upgrade pip
109+
pip install fastdup matplotlib
110+
111+
- name: Download dataset
112+
run: |
113+
wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
114+
tar -xf imagenette2-160.tgz
115+
116+
- name: Run example
117+
run: |
118+
python .github/workflows/tests/labeled_image_classification.py
119+
120+
- name: Save artifacts
121+
uses: actions/upload-artifact@v3
122+
with:
123+
name: fastdup_work_dir_labeled_image_classification
124+
path: fastdup_work_dir/
125+
126+
test-labeled-object-detection:
127+
runs-on: ${{ matrix.os }}
128+
env:
129+
SENTRY_OPT_OUT: True
130+
strategy:
131+
matrix:
132+
os: [ubuntu-latest]
133+
python-version: ['3.9']
134+
steps:
135+
- name: Checkout repository
136+
uses: actions/checkout@v3
137+
with:
138+
fetch-depth: 0
139+
140+
- name: Set up Python
141+
uses: actions/setup-python@v3
142+
with:
143+
python-version: ${{ matrix.python-version }}
144+
145+
- name: Install dependencies
146+
run: |
147+
python -m pip install --upgrade pip
148+
pip install fastdup fastdup plotly gdown
149+
150+
- name: Download dataset
151+
run: |
152+
gdown --fuzzy https://drive.google.com/file/d/1iSXVTlkV1_DhdYpVDqsjlT4NJFQ7OkyK/view
153+
unzip -qq coco_minitrain_25k.zip
154+
cd coco_minitrain_25k/annotations && gdown --fuzzy https://drive.google.com/file/d/1i12p23cXlqp1QrXjAD_vu467r4q67Mq9/view
155+
156+
- name: Run example
157+
run: |
158+
python .github/workflows/tests/labeled_object_detection.py
159+
160+
- name: Save artifacts
161+
uses: actions/upload-artifact@v3
162+
with:
163+
name: fastdup_work_dir_labeled_object_detection
164+
path: fastdup_work_dir/
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import fastdup
2+
print(f'fastdup version: {fastdup.__version__}')
3+
4+
fd = fastdup.create(work_dir="fastdup_work_dir/", input_dir="food-101/images/")
5+
fd.run(num_images=1000)
6+
7+
fd.vis.duplicates_gallery(num_images=5)
8+
fd.vis.component_gallery(num_images=5)
9+
fd.vis.outliers_gallery(num_images=5)
10+
fd.vis.stats_gallery(metric='dark', num_images=5)
11+
fd.vis.stats_gallery(metric='bright', num_images=5)
12+
fd.vis.stats_gallery(metric='blur', num_images=5)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import pandas as pd
2+
data_dir = 'imagenette2-160/'
3+
csv_path = 'imagenette2-160/noisy_imagenette.csv'
4+
5+
label_map = {
6+
'n02979186': 'cassette_player',
7+
'n03417042': 'garbage_truck',
8+
'n01440764': 'tench',
9+
'n02102040': 'English_springer',
10+
'n03028079': 'church',
11+
'n03888257': 'parachute',
12+
'n03394916': 'French_horn',
13+
'n03000684': 'chain_saw',
14+
'n03445777': 'golf_ball',
15+
'n03425413': 'gas_pump'
16+
}
17+
18+
df_annot = pd.read_csv(csv_path)
19+
# take relevant columns
20+
df_annot = df_annot[['path', 'noisy_labels_0']]
21+
22+
# rename columns to fastdup's column names
23+
df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')
24+
25+
# append datadir
26+
df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)
27+
28+
# create split column
29+
df_annot['split'] = df_annot['filename'].apply(lambda x: x.split("/")[1])
30+
31+
# map label ids to regular labels
32+
df_annot['label'] = df_annot['label'].map(label_map)
33+
34+
35+
import fastdup
36+
print(f'fastdup version: {fastdup.__version__}')
37+
38+
work_dir = 'fastdup_work_dir'
39+
fd = fastdup.create(work_dir=work_dir, input_dir=data_dir)
40+
fd.run(annotations=df_annot, ccthreshold=0.9, threshold=0.8)
41+
42+
fd.vis.duplicates_gallery(num_images=5)
43+
fd.vis.component_gallery(num_images=5)
44+
fd.vis.component_gallery(slice='chain_saw')
45+
fd.vis.outliers_gallery(num_images=5)
46+
fd.vis.similarity_gallery()
47+
48+
fd.vis.stats_gallery(metric='dark', num_images=5)
49+
fd.vis.stats_gallery(metric='bright', num_images=5)
50+
fd.vis.stats_gallery(metric='blur', num_images=5)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import fastdup
2+
print(f'fastdup version: {fastdup.__version__}')
3+
4+
import pandas as pd
5+
coco_csv = 'coco_minitrain_25k/annotations/coco_minitrain2017.csv'
6+
coco_annotations = pd.read_csv(coco_csv, header=None, names=['filename', 'col_x', 'row_y',
7+
'width', 'height', 'label', 'ext'])
8+
9+
coco_annotations['split'] = 'train' # Only train files were loaded
10+
coco_annotations['filename'] = coco_annotations['filename'].apply(lambda x: 'coco_minitrain_25k/images/train2017/'+x)
11+
coco_annotations = coco_annotations.drop_duplicates()
12+
13+
input_dir = '.'
14+
work_dir = 'fastdup_work_dir'
15+
16+
fd = fastdup.create(work_dir=work_dir, input_dir=input_dir)
17+
fd.run(annotations=coco_annotations, overwrite=True, num_images=10000)
18+
19+
fd.vis.component_gallery(metric='size', max_width=900)
20+
fd.vis.outliers_gallery()
21+
fd.vis.component_gallery(num_images=25, slice='diff')
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import fastdup
2+
print(f'fastdup version: {fastdup.__version__}')
3+
4+
fd = fastdup.create(work_dir="fastdup_work_dir/", input_dir="images/")
5+
fd.run(num_images=10000)
6+
7+
fd.vis.duplicates_gallery()
8+
fd.vis.outliers_gallery()
9+
fd.vis.stats_gallery(metric='dark')
10+
fd.vis.component_gallery()
11+
fd.vis.similarity_gallery()

0 commit comments

Comments
 (0)