Skip to content

Commit c09dd95

Browse files
committed
add basic api usage
1 parent 93d08b5 commit c09dd95

File tree

1 file changed

+222
-96
lines changed

1 file changed

+222
-96
lines changed

examples/enrichment-ram-groundingdino-sam.ipynb

Lines changed: 222 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,101 @@
270270
"## Zero-Shot Classification with RAM and Tag2Text"
271271
]
272272
},
273+
{
274+
"cell_type": "code",
275+
"execution_count": 70,
276+
"id": "a6c6331c-179f-4614-b761-f6dcbadb724f",
277+
"metadata": {
278+
"tags": []
279+
},
280+
"outputs": [
281+
{
282+
"name": "stderr",
283+
"output_type": "stream",
284+
"text": [
285+
"INFO:fastdup.models.ram:Loading model checkpoint from - /home/dnth/ram_swin_large_14m.pth\n",
286+
"INFO:fastdup.models.ram:Model loaded to device - cuda\n"
287+
]
288+
}
289+
],
290+
"source": [
291+
"from fastdup.models_ram import RecognizeAnythingModel\n",
292+
"\n",
293+
"model = RecognizeAnythingModel()\n",
294+
"result = model.run_inference(\"coco_minitrain_25k/images/val2017/000000382734.jpg\")"
295+
]
296+
},
297+
{
298+
"cell_type": "code",
299+
"execution_count": 71,
300+
"id": "420c05ad-da3b-4b5f-b76e-3f4c49bba510",
301+
"metadata": {
302+
"tags": []
303+
},
304+
"outputs": [
305+
{
306+
"data": {
307+
"text/plain": [
308+
"'bath . bathroom . doorway . drain . floor . glass door . room . screen door . shower . white'"
309+
]
310+
},
311+
"execution_count": 71,
312+
"metadata": {},
313+
"output_type": "execute_result"
314+
}
315+
],
316+
"source": [
317+
"result"
318+
]
319+
},
320+
{
321+
"cell_type": "code",
322+
"execution_count": 73,
323+
"id": "c01bf8fa-21f6-4bee-b513-28ce3c839165",
324+
"metadata": {
325+
"tags": []
326+
},
327+
"outputs": [
328+
{
329+
"name": "stderr",
330+
"output_type": "stream",
331+
"text": [
332+
"INFO:fastdup.model.tag2text:Loading model checkpoint from - /home/dnth/tag2text_swin_14m.pth\n",
333+
"INFO:fastdup.model.tag2text:Model loaded to device - cuda\n"
334+
]
335+
}
336+
],
337+
"source": [
338+
"from fastdup.models_tag2text import Tag2TextModel\n",
339+
"model = Tag2TextModel()\n",
340+
"result = model.run_inference(\"coco_minitrain_25k/images/val2017/000000382734.jpg\")"
341+
]
342+
},
343+
{
344+
"cell_type": "code",
345+
"execution_count": 74,
346+
"id": "02f68243-bcdf-404c-8b22-742d1706d194",
347+
"metadata": {
348+
"tags": []
349+
},
350+
"outputs": [
351+
{
352+
"data": {
353+
"text/plain": [
354+
"('room | floor | bathroom | shower | wall | toilet | green | white',\n",
355+
" None,\n",
356+
" 'a bathroom with green walls and a white toilet')"
357+
]
358+
},
359+
"execution_count": 74,
360+
"metadata": {},
361+
"output_type": "execute_result"
362+
}
363+
],
364+
"source": [
365+
"result"
366+
]
367+
},
273368
{
274369
"cell_type": "code",
275370
"execution_count": 37,
@@ -612,6 +707,93 @@
612707
"## Zero-Shot Detection with Grounding DINO"
613708
]
614709
},
710+
{
711+
"cell_type": "code",
712+
"execution_count": 53,
713+
"id": "5cc1e1b8-799c-461f-b08e-a95590be9a60",
714+
"metadata": {
715+
"tags": []
716+
},
717+
"outputs": [
718+
{
719+
"name": "stderr",
720+
"output_type": "stream",
721+
"text": [
722+
"INFO:fastdup.models.grounding_dino:Loading model checkpoint from - /home/dnth/groundingdino_swint_ogc.pth\n"
723+
]
724+
},
725+
{
726+
"name": "stdout",
727+
"output_type": "stream",
728+
"text": [
729+
"final text_encoder_type: bert-base-uncased\n"
730+
]
731+
},
732+
{
733+
"name": "stderr",
734+
"output_type": "stream",
735+
"text": [
736+
"Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
737+
"- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
738+
"- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
739+
"INFO:fastdup.models.grounding_dino:Model loaded on device - cuda\n"
740+
]
741+
},
742+
{
743+
"name": "stdout",
744+
"output_type": "stream",
745+
"text": [
746+
"final text_encoder_type: bert-base-uncased\n"
747+
]
748+
}
749+
],
750+
"source": [
751+
"from fastdup.models_grounding_dino import GroundingDINO\n",
752+
"\n",
753+
"model = GroundingDINO()\n",
754+
"results = model.run_inference(image_path=\"coco_minitrain_25k/images/val2017/000000449996.jpg\",\n",
755+
" text_prompt=\"air field . airliner . plane . airport . airport runway . airport terminal . jet . land . park . raceway . sky . tarmac . terminal\",\n",
756+
" box_threshold=0.3,\n",
757+
" text_threshold=0.25)"
758+
]
759+
},
760+
{
761+
"cell_type": "code",
762+
"execution_count": 54,
763+
"id": "9fdd1d2f-4df4-422d-89a7-bbec9bf8a1dc",
764+
"metadata": {
765+
"tags": []
766+
},
767+
"outputs": [
768+
{
769+
"data": {
770+
"text/plain": [
771+
"{'labels': ['sky',\n",
772+
" 'airport terminal',\n",
773+
" 'plane',\n",
774+
" 'airliner',\n",
775+
" 'jet',\n",
776+
" 'jet',\n",
777+
" 'tarmac'],\n",
778+
" 'scores': [0.5286, 0.3451, 0.3822, 0.4872, 0.3853, 0.3502, 0.3026],\n",
779+
" 'boxes': [(1.47, 1.45, 638.46, 241.37),\n",
780+
" (329.38, 291.55, 468.1, 319.69),\n",
781+
" (142.03, 247.3, 261.96, 296.55),\n",
782+
" (443.6, 111.93, 495.47, 130.84),\n",
783+
" (113.85, 290.28, 246.55, 340.23),\n",
784+
" (391.59, 271.73, 465.1, 295.48),\n",
785+
" (2.35, 277.69, 637.63, 425.32)]}"
786+
]
787+
},
788+
"execution_count": 54,
789+
"metadata": {},
790+
"output_type": "execute_result"
791+
}
792+
],
793+
"source": [
794+
"results"
795+
]
796+
},
615797
{
616798
"cell_type": "code",
617799
"execution_count": 41,
@@ -903,6 +1085,46 @@
9031085
"## Zero-Shot Segmentation with SAM"
9041086
]
9051087
},
1088+
{
1089+
"cell_type": "markdown",
1090+
"id": "dc58c743-d8e3-45b8-ae32-7cfc6474afd1",
1091+
"metadata": {},
1092+
"source": [
1093+
"For single image and single bounding box."
1094+
]
1095+
},
1096+
{
1097+
"cell_type": "code",
1098+
"execution_count": 68,
1099+
"id": "2eaaf4f7-9ff8-46f5-89b6-d9568be7b625",
1100+
"metadata": {
1101+
"tags": []
1102+
},
1103+
"outputs": [
1104+
{
1105+
"name": "stderr",
1106+
"output_type": "stream",
1107+
"text": [
1108+
"INFO:fastdup.model.sam:Loading model checkpoint from - /home/dnth/sam_vit_h_4b8939.pth\n"
1109+
]
1110+
}
1111+
],
1112+
"source": [
1113+
"from fastdup.models_sam import SegmentAnythingModel\n",
1114+
"import torch\n",
1115+
"\n",
1116+
"model = SegmentAnythingModel()\n",
1117+
"result = model.run_inference(image_path=\"coco_minitrain_25k/images/val2017/000000449996.jpg\", bboxes=torch.tensor((1.47, 1.45, 638.46, 241.37)))"
1118+
]
1119+
},
1120+
{
1121+
"cell_type": "markdown",
1122+
"id": "d137da5d-ac81-4af1-9b5a-1b4d7b79464d",
1123+
"metadata": {},
1124+
"source": [
1125+
"For multiple images and multiple bounding boxes in a DataFrame."
1126+
]
1127+
},
9061128
{
9071129
"cell_type": "code",
9081130
"execution_count": 45,
@@ -946,78 +1168,6 @@
9461168
"plot_annotations(df, image_col='filename', tags_col='ram_tags', bbox_col='grounding_dino_bboxes', scores_col='grounding_dino_scores', labels_col='grounding_dino_labels', masks_col='sam_masks')"
9471169
]
9481170
},
949-
{
950-
"cell_type": "code",
951-
"execution_count": null,
952-
"id": "cdfccccf-2b7d-47e1-bc77-c759bee7177c",
953-
"metadata": {},
954-
"outputs": [],
955-
"source": []
956-
},
957-
{
958-
"cell_type": "code",
959-
"execution_count": null,
960-
"id": "47f2cb08-7bf5-4deb-bf97-0b80f8072f94",
961-
"metadata": {},
962-
"outputs": [],
963-
"source": []
964-
},
965-
{
966-
"cell_type": "code",
967-
"execution_count": 30,
968-
"id": "70eff0bb-77d7-4f87-86d9-9d7d78888181",
969-
"metadata": {
970-
"tags": []
971-
},
972-
"outputs": [],
973-
"source": [
974-
"from fastdup.models_grounding_dino import GroundingDINO"
975-
]
976-
},
977-
{
978-
"cell_type": "code",
979-
"execution_count": 31,
980-
"id": "b9937a36-42e9-4b07-81ec-7f1786bc9420",
981-
"metadata": {
982-
"tags": []
983-
},
984-
"outputs": [],
985-
"source": [
986-
"from fastdup.models_ram import RecognizeAnythingModel"
987-
]
988-
},
989-
{
990-
"cell_type": "code",
991-
"execution_count": 32,
992-
"id": "1cc05bd1-d2b3-45a9-be7d-0978b5e1e0ca",
993-
"metadata": {
994-
"tags": []
995-
},
996-
"outputs": [],
997-
"source": [
998-
"from fastdup.models_tag2text import Tag2TextModel"
999-
]
1000-
},
1001-
{
1002-
"cell_type": "code",
1003-
"execution_count": 33,
1004-
"id": "8679b319-a659-48ec-8338-dbc917877bd4",
1005-
"metadata": {
1006-
"tags": []
1007-
},
1008-
"outputs": [],
1009-
"source": [
1010-
"from fastdup.models_sam import SegmentAnythingModel"
1011-
]
1012-
},
1013-
{
1014-
"cell_type": "code",
1015-
"execution_count": null,
1016-
"id": "f0312455-26ba-451c-b62a-ccfe7fabf80b",
1017-
"metadata": {},
1018-
"outputs": [],
1019-
"source": []
1020-
},
10211171
{
10221172
"cell_type": "markdown",
10231173
"id": "0f4b137a-25ea-44ab-b44e-ed734428de86",
@@ -1039,30 +1189,6 @@
10391189
"convert_to_coco_format(df, bbox_col='grounding_dino_bboxes', label_col='grounding_dino_labels', json_filename='grounding_dino_annot_coco_format.json')"
10401190
]
10411191
},
1042-
{
1043-
"cell_type": "code",
1044-
"execution_count": null,
1045-
"id": "0b9f42d3-ef18-4e3d-b981-c624bb2b52a9",
1046-
"metadata": {},
1047-
"outputs": [],
1048-
"source": []
1049-
},
1050-
{
1051-
"cell_type": "code",
1052-
"execution_count": null,
1053-
"id": "d99d5c9b-1f4a-4b3b-b534-41447de54a89",
1054-
"metadata": {},
1055-
"outputs": [],
1056-
"source": []
1057-
},
1058-
{
1059-
"cell_type": "code",
1060-
"execution_count": null,
1061-
"id": "53d98947-60a3-4ff6-a900-3bd154a45302",
1062-
"metadata": {},
1063-
"outputs": [],
1064-
"source": []
1065-
},
10661192
{
10671193
"cell_type": "markdown",
10681194
"id": "4bf9291f-c022-44f6-be5b-6bbb798f0c55",

0 commit comments

Comments
 (0)