Skip to content

Commit 3183e89

Browse files
committed
fixing
1 parent a45c0d2 commit 3183e89

14 files changed

+1274
-327
lines changed

fastdup/__init__.py

Lines changed: 149 additions & 47 deletions
Large diffs are not rendered by default.

fastdup/captions.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
2+
from fastdup.sentry import fastdup_capture_exception
3+
from fastdup.definitions import MISSING_LABEL
4+
from fastdup.galleries import fastdup_imread
5+
from tqdm import tqdm
6+
import cv2
7+
8+
def generate_labels(filenames, kwargs):
9+
try:
10+
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
11+
import torch
12+
except Exception as e:
13+
fastdup_capture_exception("Auto generate labels", e)
14+
print("For auto captioning images need to install transforms and torch packages using `pip install transformers torch`")
15+
return [MISSING_LABEL]*len(filenames)
16+
17+
try:
18+
from PIL import Image
19+
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
20+
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
21+
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
22+
23+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24+
model.to(device)
25+
max_length = 16
26+
num_beams = 4
27+
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
28+
29+
images = []
30+
for image_path in tqdm(filenames):
31+
i_image = fastdup_imread(image_path, None, kwargs=kwargs)
32+
if i_image is not None:
33+
i_image = cv2.cvtColor(i_image, cv2.COLOR_BGR2RGB)
34+
im_pil = Image.fromarray(i_image)
35+
images.append(im_pil)
36+
else:
37+
images.append(None)
38+
39+
pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
40+
pixel_values = pixel_values.to(device)
41+
output_ids = model.generate(pixel_values, **gen_kwargs)
42+
43+
preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
44+
preds = [pred.strip() for pred in preds]
45+
return preds
46+
except Exception as e:
47+
fastdup_capture_exception("Auto caption image", e)
48+
return [MISSING_LABEL]*len(filenames)
49+
50+
def generate_blip_labels(filenames, kwargs):
51+
52+
try:
53+
from transformers import BlipProcessor, BlipForConditionalGeneration
54+
from PIL import Image
55+
except Exception as e:
56+
fastdup_capture_exception("Auto generate labels", e)
57+
print("For auto captioning images need to install transforms and torch packages using `pip install transformers`")
58+
return [MISSING_LABEL] * len(filenames)
59+
60+
try:
61+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
62+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
63+
preds = []
64+
for image_path in tqdm(filenames):
65+
i_image = fastdup_imread(image_path, None, kwargs=kwargs)
66+
if i_image is not None:
67+
i_image = cv2.cvtColor(i_image, cv2.COLOR_BGR2RGB)
68+
im_pil = Image.fromarray(i_image)
69+
inputs = processor(im_pil, return_tensors="pt")
70+
out = model.generate(**inputs)
71+
preds.append((processor.decode(out[0], skip_special_tokens=True)))
72+
else:
73+
preds.append(MISSING_LABEL)
74+
return preds
75+
76+
except Exception as e:
77+
fastdup_capture_exception("Auto caption image blip", e)
78+
return [MISSING_LABEL]*len(filenames)
79+
80+
def generate_blip2_labels(filenames, kwargs, text=None):
81+
82+
try:
83+
from transformers import Blip2Processor, Blip2Model
84+
from PIL import Image
85+
import torch
86+
except Exception as e:
87+
fastdup_capture_exception("Auto generate labels", e)
88+
print("For auto captioning images need to install transforms and torch packages using `pip install transformers torch`")
89+
return [MISSING_LABEL] * len(filenames)
90+
91+
try:
92+
93+
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
94+
model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
95+
device = "cuda" if torch.cuda.is_available() else "cpu"
96+
model.to(device)
97+
preds = []
98+
for image_path in tqdm(filenames):
99+
i_image = fastdup_imread(image_path, None, kwargs=kwargs)
100+
if i_image is not None:
101+
i_image = cv2.cvtColor(i_image, cv2.COLOR_BGR2RGB)
102+
im_pil = Image.fromarray(i_image)
103+
inputs = processor(images=im_pil, text=text, return_tensors="pt").to(device, torch.float16)
104+
generated_ids = model.generate(**inputs)
105+
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
106+
preds.append(generated_text)
107+
else:
108+
preds.append(MISSING_LABEL)
109+
return preds
110+
111+
except Exception as e:
112+
fastdup_capture_exception("Auto caption image blip", e)
113+
return [MISSING_LABEL]*len(filenames)
114+
115+
116+
117+
118+
119+
120+
def generate_vqa_labels(filenames, text, kwargs):
121+
try:
122+
from transformers import ViltProcessor, ViltForQuestionAnswering
123+
from PIL import Image
124+
except Exception as e:
125+
fastdup_capture_exception("Auto generate labels", e)
126+
print(
127+
"For auto captioning images need to install transforms and torch packages using `pip install transformers`")
128+
return [MISSING_LABEL] * len(filenames)
129+
130+
try:
131+
preds = []
132+
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
133+
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
134+
for image_path in tqdm(filenames):
135+
i_image = fastdup_imread(image_path, None, kwargs=kwargs)
136+
if i_image is not None:
137+
i_image = cv2.cvtColor(i_image, cv2.COLOR_BGR2RGB)
138+
im_pil = Image.fromarray(i_image)
139+
encoding = processor(im_pil, text, return_tensors="pt")
140+
141+
# forward pass
142+
outputs = model(**encoding)
143+
logits = outputs.logits
144+
idx = logits.argmax(-1).item()
145+
preds.append(model.config.id2label[idx])
146+
else:
147+
preds.append(MISSING_LABEL)
148+
149+
return preds
150+
151+
except Exception as e:
152+
fastdup_capture_exception("Auto caption image vqa", e)
153+
return [MISSING_LABEL]*len(filenames)
154+
155+
156+
def generate_age_labels(filenames, kwargs):
157+
from transformers import ViTFeatureExtractor, ViTForImageClassification
158+
model = ViTForImageClassification.from_pretrained('nateraw/vit-age-classifier')
159+
transforms = ViTFeatureExtractor.from_pretrained('nateraw/vit-age-classifier')
160+
161+
try:
162+
preds = []
163+
# Get example image from official fairface repo + read it in as an image
164+
for image_path in tqdm(filenames):
165+
i_image = fastdup_imread(image_path, None, kwargs=kwargs)
166+
# Init model, transforms
167+
168+
# Transform our image and pass it through the model
169+
inputs = transforms(i_image, return_tensors='pt')
170+
output = model(**inputs)
171+
172+
# Predicted Class probabilities
173+
proba = output.logits.softmax(1)
174+
175+
# Predicted Classes
176+
pred = int(proba.argmax(1)[0].int())
177+
preds.append( model.config.id2label[pred])
178+
return preds
179+
except Exception as e:
180+
fastdup_capture_exception("Age label", e)
181+
return [MISSING_LABEL] * len(filenames)

fastdup/definitions.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
DEFUALT_METRIC_ZERO = 0
4949
DEFAULT_METRIC_MINUS_ONE = -1
50-
VERSION__ = "0.912"
50+
VERSION__ = "0.927"
5151

5252
GITHUB_URL = "https://github.com/visual-layer/fastdup/issues"
5353

@@ -72,6 +72,96 @@
7272
SELECTION_STRATEGY_UNIFORM_METRIC = 2
7373

7474
YOLOV5S_MODEL = "https://github.com/itsnine/yolov5-onnxruntime/raw/master/models/yolov5s.onnx"
75+
DINOV2S_MODEL = "https://vl-company-website.s3.us-east-2.amazonaws.com/model_artifacts/dinov2/dinov2_vits14.onnx"
76+
DINOV2S_MODEL_DIM = 384
77+
DINOV2B_MODEL = "https://vl-company-website.s3.us-east-2.amazonaws.com/model_artifacts/dinov2/dinov2_vitb14.onnx"
78+
DINOV2B_MODEL_DIM = 768
79+
80+
CAPTION_MODEL1_NAME = 'automatic'
81+
CAPTION_MODEL2_NAME = 'blip'
82+
CAPTION_MODEL3_NAME = 'blip2'
83+
VQA_MODEL1_NAME = "indoors_outdoors"
84+
AGE_LABEL1_NAME = 'age'
85+
86+
# dtypes
87+
IMG = 'image'
88+
BBOX = 'bbox'
89+
90+
# run modes
91+
MODE_DEFAULT = 'default'
92+
MODE_EMBEDDING = 'embedding'
93+
MODE_CROP = 'crop'
94+
MODE_ROTATED_BBOX = 'rotated'
95+
96+
# fastdup files
97+
SPLITS_CSV = 'splits_found.json'
98+
MAPPING_CSV = 'atrain_features.dat.csv'
99+
BAD_CSV = 'atrain_features.bad.csv'
100+
FEATURES_DATA = 'atrain_features.dat'
101+
STATS_CSV = 'atrain_stats.csv'
102+
CONFIG_JSON = 'config.json'
103+
CROPS_CSV = 'atrain_crops.csv'
104+
105+
# extra files
106+
BBOX_INPUT_CSV = 'objects_annot_fastdup_input.csv'
107+
ANNOT_PKL = 'full_annot.pkl.gz'
108+
IMG_GRP_ANNOT_PKL = 'img_grouped_annot.pkl.gz'
109+
110+
# annotation expected columns
111+
ANNOT_FILENAME = 'filename'
112+
ANNOT_CROP_FILENAME = 'crop_filename'
113+
ANNOT_IMG_ID = 'img_id'
114+
ANNOT_IMG_H = 'img_h'
115+
ANNOT_IMG_W = 'img_w'
116+
ANNOT_BBOX_X = 'col_x'
117+
ANNOT_BBOX_Y = 'row_y'
118+
ANNOT_BBOX_W = 'width'
119+
ANNOT_BBOX_H = 'height'
120+
ANNOT_ROT_BBOX_X1 = 'x1'
121+
ANNOT_ROT_BBOX_Y1 = 'y1'
122+
ANNOT_ROT_BBOX_X2 = 'x2'
123+
ANNOT_ROT_BBOX_Y2 = 'y2'
124+
ANNOT_ROT_BBOX_X3 = 'x3'
125+
ANNOT_ROT_BBOX_Y3 = 'y3'
126+
ANNOT_ROT_BBOX_X4 = 'x4'
127+
ANNOT_ROT_BBOX_Y4 = 'y4'
128+
ANNOT_SPLIT = 'split'
129+
ANNOT_ERROR = 'error_code'
130+
ANNOT_LABEL = 'label'
131+
132+
# extended annotation columns
133+
ANNOT_VALID = 'is_valid'
134+
ANNOT_FD_ID = 'index'
135+
136+
# Connected components columns
137+
CC_INST_ID = '__id'
138+
CC_UNI_SPLIT = 'uni_split'
139+
CC_BI_SPLIT = 'bi_split'
140+
141+
# bad files columns
142+
BAD_FILENAME = 'filename'
143+
BAD_ERROR = 'error_code'
144+
BAD_FD_ID = 'index'
145+
146+
# similarity columns
147+
SIM_SRC_IMG = 'from'
148+
SIM_DST_IMG = 'to'
149+
SIM_SCORE = 'distance'
150+
151+
# outliers columns
152+
OUT_ID = 'outlier'
153+
OUT_NEAREST_NEIGHBOR = 'nearest'
154+
OUT_SCORE = 'distance'
155+
156+
# stats columns
157+
STATS_INST_ID = 'index'
158+
159+
# map file columns
160+
MAP_INST_ID = 'index'
161+
MAP_FILENAME = 'filename'
162+
163+
ERROR_MISSING_IMAGE = 'ERROR_MISSING_FILE'
164+
ERROR_BAD_BOUNDING_BOX = 'ERROR_BAD_BOUNDING_BOX'
75165

76166
def get_sep():
77167
return os.sep

fastdup/engine.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def run(self,
8383
- image_filename: {Mandatory}. Relative path to the image wtr to input_dir
8484
- split: (Optional). 'train' or 'test'
8585
- label: (Optional). Class of the image
86-
- bbox_x, bbox_y, bbox_h, bbox_w: (Optional). Bounding box of the object in the image
86+
- row_y, col_x, height, width: (Optional). Bounding box of the object in the image
8787
if provided, fastdup will run on the bounding box instead of the whole image
8888
- x1, y1, x2, y2, x3, y3, x4, y4: (Optional). Bounding box of the object in the image
8989
if provided, and bounding_box=='rotated_bbox' fastdup will run on the rotated bounding box.
@@ -148,8 +148,9 @@ def run(self,
148148
verbose=verbose,
149149
license='' if license is None else license,
150150
high_accuracy=high_accuracy)
151-
if model_path is not None:
152-
assert 'd' in kwargs, 'Please provide d parameter to indicate the model output dimension'
151+
if (model_path is not None):
152+
if 'dinov2s' not in model_path and 'dinov2b' not in model_path:
153+
assert 'd' in kwargs, 'Please provide d parameter to indicate the model output dimension'
153154
fastdup_func_params['model_path'] = model_path
154155
fastdup_func_params.update(kwargs)
155156

0 commit comments

Comments
 (0)