@@ -27,7 +27,129 @@ public function __construct(
27
27
) {
28
28
}
29
29
30
- public function vectorizeEmbeddableDocuments (array $ documents , array $ options = []): array
30
+ public function vectorize (string |\Stringable |EmbeddableDocumentInterface |array $ values , array $ options = []): Vector |VectorDocument |array
31
+ {
32
+ if (\is_string ($ values ) || $ values instanceof \Stringable) {
33
+ return $ this ->vectorizeString ($ values , $ options );
34
+ }
35
+
36
+ if ($ values instanceof EmbeddableDocumentInterface) {
37
+ return $ this ->vectorizeEmbeddableDocument ($ values , $ options );
38
+ }
39
+
40
+ if ([] === $ values ) {
41
+ return [];
42
+ }
43
+
44
+ $ firstElement = reset ($ values );
45
+ if ($ firstElement instanceof EmbeddableDocumentInterface) {
46
+ $ this ->validateArray ($ values , EmbeddableDocumentInterface::class);
47
+
48
+ return $ this ->vectorizeEmbeddableDocuments ($ values , $ options );
49
+ }
50
+
51
+ if (\is_string ($ firstElement ) || $ firstElement instanceof \Stringable) {
52
+ $ this ->validateArray ($ values , 'string|stringable ' );
53
+
54
+ return $ this ->vectorizeStrings ($ values , $ options );
55
+ }
56
+
57
+ throw new RuntimeException ('Array must contain only strings, Stringable objects, or EmbeddableDocumentInterface instances. ' );
58
+ }
59
+
60
+ /**
61
+ * @param array<mixed> $values
62
+ */
63
+ private function validateArray (array $ values , string $ expectedType ): void
64
+ {
65
+ foreach ($ values as $ value ) {
66
+ if ('string|stringable ' === $ expectedType ) {
67
+ if (!\is_string ($ value ) && !$ value instanceof \Stringable) {
68
+ throw new RuntimeException ('Array must contain only strings or Stringable objects. ' );
69
+ }
70
+ } elseif (!$ value instanceof $ expectedType ) {
71
+ throw new RuntimeException (\sprintf ('Array must contain only "%s" instances. ' , $ expectedType ));
72
+ }
73
+ }
74
+ }
75
+
76
+ /**
77
+ * @param array<string, mixed> $options
78
+ */
79
+ private function vectorizeString (string |\Stringable $ string , array $ options = []): Vector
80
+ {
81
+ $ stringValue = (string ) $ string ;
82
+ $ this ->logger ->debug ('Vectorizing string ' , ['string ' => $ stringValue ]);
83
+
84
+ $ result = $ this ->platform ->invoke ($ this ->model , $ stringValue , $ options );
85
+ $ vectors = $ result ->asVectors ();
86
+
87
+ if (!isset ($ vectors [0 ])) {
88
+ throw new RuntimeException ('No vector returned for string vectorization. ' );
89
+ }
90
+
91
+ return $ vectors [0 ];
92
+ }
93
+
94
+ /**
95
+ * @param array<string, mixed> $options
96
+ */
97
+ private function vectorizeEmbeddableDocument (EmbeddableDocumentInterface $ document , array $ options = []): VectorDocument
98
+ {
99
+ $ this ->logger ->debug ('Vectorizing embeddable document ' , ['document_id ' => $ document ->getId ()]);
100
+
101
+ $ vector = $ this ->vectorizeString ($ document ->getContent (), $ options );
102
+
103
+ return new VectorDocument ($ document ->getId (), $ vector , $ document ->getMetadata ());
104
+ }
105
+
106
+ /**
107
+ * @param array<string|\Stringable> $strings
108
+ * @param array<string, mixed> $options
109
+ *
110
+ * @return array<Vector>
111
+ */
112
+ private function vectorizeStrings (array $ strings , array $ options = []): array
113
+ {
114
+ $ stringCount = \count ($ strings );
115
+ $ this ->logger ->info ('Starting vectorization of strings ' , ['string_count ' => $ stringCount ]);
116
+
117
+ // Convert all values to strings
118
+ $ stringValues = array_map (fn (string |\Stringable $ s ) => (string ) $ s , $ strings );
119
+
120
+ if ($ this ->platform ->getModelCatalog ()->getModel ($ this ->model )->supports (Capability::INPUT_MULTIPLE )) {
121
+ $ this ->logger ->debug ('Using batch vectorization with model that supports multiple inputs ' );
122
+ $ result = $ this ->platform ->invoke ($ this ->model , $ stringValues , $ options );
123
+
124
+ $ vectors = $ result ->asVectors ();
125
+ $ this ->logger ->debug ('Batch vectorization completed ' , ['vector_count ' => \count ($ vectors )]);
126
+ } else {
127
+ $ this ->logger ->debug ('Using sequential vectorization for model without multiple input support ' );
128
+ $ results = [];
129
+ foreach ($ stringValues as $ i => $ string ) {
130
+ $ this ->logger ->debug ('Vectorizing string ' , ['string_index ' => $ i ]);
131
+ $ results [] = $ this ->platform ->invoke ($ this ->model , $ string , $ options );
132
+ }
133
+
134
+ $ vectors = [];
135
+ foreach ($ results as $ result ) {
136
+ $ vectors = array_merge ($ vectors , $ result ->asVectors ());
137
+ }
138
+ $ this ->logger ->debug ('Sequential vectorization completed ' , ['vector_count ' => \count ($ vectors )]);
139
+ }
140
+
141
+ $ this ->logger ->info ('Vectorization process completed ' , ['string_count ' => $ stringCount , 'vector_count ' => \count ($ vectors )]);
142
+
143
+ return $ vectors ;
144
+ }
145
+
146
+ /**
147
+ * @param array<EmbeddableDocumentInterface> $documents
148
+ * @param array<string, mixed> $options
149
+ *
150
+ * @return array<VectorDocument>
151
+ */
152
+ private function vectorizeEmbeddableDocuments (array $ documents , array $ options = []): array
31
153
{
32
154
$ documentCount = \count ($ documents );
33
155
$ this ->logger ->info ('Starting vectorization process ' , ['document_count ' => $ documentCount ]);
@@ -65,18 +187,4 @@ public function vectorizeEmbeddableDocuments(array $documents, array $options =
65
187
66
188
return $ vectorDocuments ;
67
189
}
68
-
69
- public function vectorize (string |\Stringable $ string , array $ options = []): Vector
70
- {
71
- $ this ->logger ->debug ('Vectorizing string ' , ['string ' => (string ) $ string ]);
72
-
73
- $ result = $ this ->platform ->invoke ($ this ->model , (string ) $ string , $ options );
74
- $ vectors = $ result ->asVectors ();
75
-
76
- if (!isset ($ vectors [0 ])) {
77
- throw new RuntimeException ('No vector returned for string vectorization. ' );
78
- }
79
-
80
- return $ vectors [0 ];
81
- }
82
190
}
0 commit comments