Skip to content

Commit a79b5f4

Browse files
authored
Add image-text-to-text + multimodal fixes (#477)
Tentatively * We align better what multimodal is * We add a new type for llava
1 parent 187f221 commit a79b5f4

File tree

4 files changed

+46
-8
lines changed

4 files changed

+46
-8
lines changed

packages/tasks/src/pipelines.ts

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ export const PIPELINE_DATA = {
238238
},
239239
"feature-extraction": {
240240
name: "Feature Extraction",
241-
modality: "multimodal",
241+
modality: "nlp",
242242
color: "red",
243243
},
244244
"text-generation": {
@@ -419,7 +419,7 @@ export const PIPELINE_DATA = {
419419
},
420420
"text-to-image": {
421421
name: "Text-to-Image",
422-
modality: "multimodal",
422+
modality: "cv",
423423
color: "yellow",
424424
},
425425
"image-to-text": {
@@ -430,7 +430,7 @@ export const PIPELINE_DATA = {
430430
name: "Image Captioning",
431431
},
432432
],
433-
modality: "multimodal",
433+
modality: "cv",
434434
color: "red",
435435
},
436436
"image-to-image": {
@@ -454,7 +454,7 @@ export const PIPELINE_DATA = {
454454
},
455455
"image-to-video": {
456456
name: "Image-to-Video",
457-
modality: "multimodal",
457+
modality: "cv",
458458
color: "indigo",
459459
},
460460
"unconditional-image-generation": {
@@ -589,9 +589,15 @@ export const PIPELINE_DATA = {
589589
},
590590
"text-to-video": {
591591
name: "Text-to-Video",
592-
modality: "multimodal",
592+
modality: "cv",
593593
color: "green",
594594
},
595+
"image-text-to-text": {
596+
name: "Image + Text to Image (VLLMs)",
597+
modality: "multimodal",
598+
color: "red",
599+
hideInDatasets: true,
600+
},
595601
"visual-question-answering": {
596602
name: "Visual Question Answering",
597603
subtasks: [
@@ -622,7 +628,7 @@ export const PIPELINE_DATA = {
622628
},
623629
"graph-ml": {
624630
name: "Graph Machine Learning",
625-
modality: "multimodal",
631+
modality: "other",
626632
color: "green",
627633
},
628634
"mask-generation": {
@@ -637,12 +643,12 @@ export const PIPELINE_DATA = {
637643
},
638644
"text-to-3d": {
639645
name: "Text-to-3D",
640-
modality: "multimodal",
646+
modality: "cv",
641647
color: "yellow",
642648
},
643649
"image-to-3d": {
644650
name: "Image-to-3D",
645-
modality: "multimodal",
651+
modality: "cv",
646652
color: "green",
647653
},
648654
other: {

packages/tasks/src/tasks/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
5353
"graph-ml": ["transformers"],
5454
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
5555
"image-segmentation": ["transformers", "transformers.js"],
56+
"image-text-to-text": ["transformers"],
5657
"image-to-image": ["diffusers", "transformers", "transformers.js"],
5758
"image-to-text": ["transformers", "transformers.js"],
5859
"image-to-video": ["diffusers"],
@@ -130,6 +131,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
130131
"graph-ml": undefined,
131132
"image-classification": getData("image-classification", imageClassification),
132133
"image-segmentation": getData("image-segmentation", imageSegmentation),
134+
"image-text-to-text": undefined,
133135
"image-to-image": getData("image-to-image", imageToImage),
134136
"image-to-text": getData("image-to-text", imageToText),
135137
"image-to-video": undefined,
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<script lang="ts">
2+
export let classNames = "";
3+
</script>
4+
5+
<svg
6+
class={classNames}
7+
xmlns="http://www.w3.org/2000/svg"
8+
xmlns:xlink="http://www.w3.org/1999/xlink"
9+
aria-hidden="true"
10+
role="img"
11+
width="1em"
12+
height="1em"
13+
preserveAspectRatio="xMidYMid meet"
14+
viewBox="0 0 32 32"
15+
>
16+
<path
17+
d="M29.707 19.293l-3-3a1 1 0 0 0-1.414 0L16 25.586V30h4.414l9.293-9.293a1 1 0 0 0 0-1.414zM19.586 28H18v-1.586l5-5L24.586 23zM26 21.586L24.414 20L26 18.414L27.586 20z"
18+
fill="currentColor"
19+
/>
20+
<path
21+
d="M20 13v-2h-2.142a3.94 3.94 0 0 0-.425-1.019l1.517-1.517l-1.414-1.414l-1.517 1.517A3.944 3.944 0 0 0 15 8.142V6h-2v2.142a3.944 3.944 0 0 0-1.019.425L10.464 7.05L9.05 8.464l1.517 1.517A3.94 3.94 0 0 0 10.142 11H8v2h2.142a3.94 3.94 0 0 0 .425 1.019L9.05 15.536l1.414 1.414l1.517-1.517a3.944 3.944 0 0 0 1.019.425V18h2v-2.142a3.944 3.944 0 0 0 1.019-.425l1.517 1.517l1.414-1.414l-1.517-1.517A3.94 3.94 0 0 0 17.858 13zm-6 1a2 2 0 1 1 2-2a2.002 2.002 0 0 1-2 2z"
22+
fill="currentColor"
23+
/>
24+
<path
25+
d="M12 30H6a2.002 2.002 0 0 1-2-2V4a2.002 2.002 0 0 1 2-2h16a2.002 2.002 0 0 1 2 2v10h-2V4H6v24h6z"
26+
fill="currentColor"
27+
/>
28+
</svg>

packages/widgets/src/lib/components/PipelineIcon/PipelineIcon.svelte

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import IconConversational from "../Icons/IconConversational.svelte";
88
import IconFeatureExtraction from "../Icons/IconFeatureExtraction.svelte";
99
import IconFillMask from "../Icons/IconFillMask.svelte";
10+
import IconImageAndTextToText from "../Icons/IconImageAndTextToText.svelte";
1011
import IconImageClassification from "../Icons/IconImageClassification.svelte";
1112
import IconImageSegmentation from "../Icons/IconImageSegmentation.svelte";
1213
import IconObjectDetection from "../Icons/IconObjectDetection.svelte";
@@ -75,6 +76,7 @@
7576
"video-classification": IconVideoClassification,
7677
"image-segmentation": IconImageSegmentation,
7778
"text-to-image": IconTextToImage,
79+
"image-text-to-text": IconImageAndTextToText,
7880
"image-to-text": IconImageToText,
7981
"image-to-image": IconImageToImage,
8082
"image-to-video": IconImageToVideo,

0 commit comments

Comments
 (0)