Skip to content

Commit 0122b1e

Browse files
authored
add classification and object det
1 parent 2efa2b3 commit 0122b1e

File tree

3 files changed

+150
-0
lines changed

3 files changed

+150
-0
lines changed

.github/workflows/examples-ci.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,83 @@ jobs:
8383
uses: actions/upload-artifact@v3
8484
with:
8585
name: fastdup_work_dir_cleaning_image_dataset
86+
path: fastdup_work_dir/
87+
88+
test-labeled-image-classification:
89+
runs-on: ${{ matrix.os }}
90+
env:
91+
SENTRY_OPT_OUT: True
92+
strategy:
93+
matrix:
94+
os: [ubuntu-latest]
95+
python-version: ['3.9']
96+
steps:
97+
- name: Checkout repository
98+
uses: actions/checkout@v3
99+
with:
100+
fetch-depth: 0
101+
102+
- name: Set up Python
103+
uses: actions/setup-python@v3
104+
with:
105+
python-version: ${{ matrix.python-version }}
106+
107+
- name: Install dependencies
108+
run: |
109+
python -m pip install --upgrade pip
110+
pip install fastdup matplotlib
111+
112+
- name: Download dataset
113+
run: |
114+
wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
115+
tar -xf imagenette2-160.tgz
116+
117+
- name: Run example
118+
run: |
119+
python .github/workflows/tests/labeled_image_classification.py
120+
121+
- name: Save artifacts
122+
uses: actions/upload-artifact@v3
123+
with:
124+
name: fastdup_work_dir_labeled_image_classification
125+
path: fastdup_work_dir/
126+
127+
test-labeled-object-detection:
128+
runs-on: ${{ matrix.os }}
129+
env:
130+
SENTRY_OPT_OUT: True
131+
strategy:
132+
matrix:
133+
os: [ubuntu-latest]
134+
python-version: ['3.9']
135+
steps:
136+
- name: Checkout repository
137+
uses: actions/checkout@v3
138+
with:
139+
fetch-depth: 0
140+
141+
- name: Set up Python
142+
uses: actions/setup-python@v3
143+
with:
144+
python-version: ${{ matrix.python-version }}
145+
146+
- name: Install dependencies
147+
run: |
148+
python -m pip install --upgrade pip
149+
pip install fastdup fastdup plotly gdown
150+
151+
- name: Download dataset
152+
run: |
153+
gdown --fuzzy https://drive.google.com/file/d/1iSXVTlkV1_DhdYpVDqsjlT4NJFQ7OkyK/view
154+
unzip -qq coco_minitrain_25k.zip
155+
cd coco_minitrain_25k/annotations && gdown --fuzzy https://drive.google.com/file/d/1i12p23cXlqp1QrXjAD_vu467r4q67Mq9/view
156+
157+
- name: Run example
158+
run: |
159+
python .github/workflows/tests/labeled_object_detection.py
160+
161+
- name: Save artifacts
162+
uses: actions/upload-artifact@v3
163+
with:
164+
name: fastdup_work_dir_labeled_object_detection
86165
path: fastdup_work_dir/
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import pandas as pd
2+
data_dir = 'imagenette2-160/'
3+
csv_path = 'imagenette2-160/noisy_imagenette.csv'
4+
5+
label_map = {
6+
'n02979186': 'cassette_player',
7+
'n03417042': 'garbage_truck',
8+
'n01440764': 'tench',
9+
'n02102040': 'English_springer',
10+
'n03028079': 'church',
11+
'n03888257': 'parachute',
12+
'n03394916': 'French_horn',
13+
'n03000684': 'chain_saw',
14+
'n03445777': 'golf_ball',
15+
'n03425413': 'gas_pump'
16+
}
17+
18+
df_annot = pd.read_csv(csv_path)
19+
# take relevant columns
20+
df_annot = df_annot[['path', 'noisy_labels_0']]
21+
22+
# rename columns to fastdup's column names
23+
df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')
24+
25+
# append datadir
26+
df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)
27+
28+
# create split column
29+
df_annot['split'] = df_annot['filename'].apply(lambda x: x.split("/")[1])
30+
31+
# map label ids to regular labels
32+
df_annot['label'] = df_annot['label'].map(label_map)
33+
34+
35+
import fastdup
36+
print(f'fastdup version: {fastdup.__version__}')
37+
38+
work_dir = 'fastdup_imagenette'
39+
fd = fastdup.create(work_dir=work_dir, input_dir=data_dir)
40+
fd.run(annotations=df_annot, ccthreshold=0.9, threshold=0.8)
41+
42+
fd.vis.duplicates_gallery(num_images=5)
43+
fd.vis.component_gallery(num_images=5)
44+
fd.vis.component_gallery(slice='chain_saw')
45+
fd.vis.outliers_gallery(num_images=5)
46+
fd.vis.similarity_gallery()
47+
48+
fd.vis.stats_gallery(metric='dark', num_images=5)
49+
fd.vis.stats_gallery(metric='bright', num_images=5)
50+
fd.vis.stats_gallery(metric='blur', num_images=5)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import fastdup
2+
print(f'fastdup version: {fastdup.__version__}')
3+
4+
import pandas as pd
5+
coco_csv = 'coco_minitrain_25k/annotations/coco_minitrain2017.csv'
6+
coco_annotations = pd.read_csv(coco_csv, header=None, names=['filename', 'col_x', 'row_y',
7+
'width', 'height', 'label', 'ext'])
8+
9+
coco_annotations['split'] = 'train' # Only train files were loaded
10+
coco_annotations['filename'] = coco_annotations['filename'].apply(lambda x: 'coco_minitrain_25k/images/train2017/'+x)
11+
coco_annotations = coco_annotations.drop_duplicates()
12+
13+
input_dir = '.'
14+
work_dir = 'fastdup_minicoco'
15+
16+
fd = fastdup.create(work_dir=work_dir, input_dir=input_dir)
17+
fd.run(annotations=coco_annotations, overwrite=True, num_images=10000)
18+
19+
fd.vis.component_gallery(metric='size', max_width=900)
20+
fd.vis.outliers_gallery()
21+
fd.vis.component_gallery(num_images=25, slice='diff')

0 commit comments

Comments
 (0)