Skip to content

Commit 4d6d3be

Browse files
committed
Fix android and ios usages of image
1 parent 78fa105 commit 4d6d3be

File tree

3 files changed

+65
-82
lines changed

3 files changed

+65
-82
lines changed

extension/android/jni/jni_layer_llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
268268
for (int i = 0; i < image_size; i++) {
269269
image_data[i] = image_data_jint[i];
270270
}
271-
llm::Image image_runner{image_data, width, height, channels};
271+
llm::Image image_runner{std::move(image_data), width, height, channels};
272272
prefill_inputs_.emplace_back(
273273
llm::MultimodalInput{std::move(image_runner)});
274274
}

extension/llm/apple/ExecuTorchLLM/Exported/ExecuTorchLLMMultimodalRunner.mm

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,12 @@ - (BOOL)generate:(NSArray<ExecuTorchLLMMultimodalInput *> *)inputs
172172
case ExecuTorchLLMMultimodalInputTypeImage: {
173173
ExecuTorchLLMImage *image = input.image;
174174
std::vector<uint8_t> data((uint8_t *)image.data.bytes, (uint8_t *)image.data.bytes + image.data.length);
175-
nativeInputs.emplace_back(llm::MultimodalInput(llm::Image{
176-
.data = std::move(data),
177-
.width = (int32_t)image.width,
178-
.height = (int32_t)image.height,
179-
.channels = (int32_t)image.channels
180-
}));
175+
nativeInputs.emplace_back(llm::MultimodalInput(llm::Image(
176+
std::move(data),
177+
(int32_t)image.width,
178+
(int32_t)image.height,
179+
(int32_t)image.channels
180+
)));
181181
break;
182182
}
183183
default: {

extension/llm/runner/test/test_multimodal_input.cpp

Lines changed: 58 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ using executorch::extension::llm::make_image_input;
1616
using executorch::extension::llm::make_text_input;
1717
using executorch::extension::llm::MultimodalInput;
1818

19-
namespace {
2019
class MultimodalInputTest : public Test {
2120
protected:
2221
std::string createTestText() {
@@ -28,21 +27,13 @@ class MultimodalInputTest : public Test {
2827
}
2928

3029
Image createTestImage() {
31-
Image img;
32-
img.width = 224;
33-
img.height = 224;
34-
img.channels = 3;
35-
img.data = std::vector<uint8_t>(224 * 224 * 3, 128); // Fill with gray
36-
return img;
30+
std::vector<uint8_t> data(224 * 224 * 3, 128); // Fill with gray
31+
return Image(std::move(data), 224, 224, 3);
3732
}
3833

3934
Image createTestImageSmall() {
40-
Image img;
41-
img.width = 32;
42-
img.height = 32;
43-
img.channels = 1;
44-
img.data = std::vector<uint8_t>(32 * 32, 255); // Fill with white
45-
return img;
35+
std::vector<uint8_t> data(32 * 32, 255); // Fill with white
36+
return Image(std::move(data), 32, 32, 1);
4637
}
4738
};
4839

@@ -76,28 +67,28 @@ TEST_F(MultimodalInputTest, ImageConstructorFromImage) {
7667
EXPECT_FALSE(input.is_text());
7768
EXPECT_TRUE(input.is_image());
7869
EXPECT_EQ(input.get_type(), MultimodalInput::Type::IMAGE);
79-
EXPECT_EQ(input.get_image().width, 224);
80-
EXPECT_EQ(input.get_image().height, 224);
81-
EXPECT_EQ(input.get_image().channels, 3);
82-
EXPECT_EQ(input.get_image().data.size(), 224 * 224 * 3);
70+
EXPECT_EQ(input.get_image().width(), 224);
71+
EXPECT_EQ(input.get_image().height(), 224);
72+
EXPECT_EQ(input.get_image().channels(), 3);
73+
EXPECT_EQ(input.get_image().get_uint8_data().size(), 224 * 224 * 3);
8374
}
8475

8576
TEST_F(MultimodalInputTest, ImageConstructorFromRvalueImage) {
8677
Image img = createTestImage();
87-
int width = img.width;
88-
int height = img.height;
89-
int channels = img.channels;
90-
size_t data_size = img.data.size();
78+
int width = img.width();
79+
int height = img.height();
80+
int channels = img.channels();
81+
size_t data_size = img.get_uint8_data().size();
9182

9283
MultimodalInput input(std::move(img));
9384

9485
EXPECT_FALSE(input.is_text());
9586
EXPECT_TRUE(input.is_image());
9687
EXPECT_EQ(input.get_type(), MultimodalInput::Type::IMAGE);
97-
EXPECT_EQ(input.get_image().width, width);
98-
EXPECT_EQ(input.get_image().height, height);
99-
EXPECT_EQ(input.get_image().channels, channels);
100-
EXPECT_EQ(input.get_image().data.size(), data_size);
88+
EXPECT_EQ(input.get_image().width(), width);
89+
EXPECT_EQ(input.get_image().height(), height);
90+
EXPECT_EQ(input.get_image().channels(), channels);
91+
EXPECT_EQ(input.get_image().get_uint8_data().size(), data_size);
10192
}
10293

10394
// Test copy constructor and assignment
@@ -129,10 +120,10 @@ TEST_F(MultimodalInputTest, CopyConstructorImage) {
129120
MultimodalInput copy(original);
130121

131122
EXPECT_TRUE(copy.is_image());
132-
EXPECT_EQ(copy.get_image().width, 224);
133-
EXPECT_EQ(copy.get_image().height, 224);
134-
EXPECT_EQ(copy.get_image().channels, 3);
135-
EXPECT_EQ(original.get_image().width, 224); // Original should be unchanged
123+
EXPECT_EQ(copy.get_image().width(), 224);
124+
EXPECT_EQ(copy.get_image().height(), 224);
125+
EXPECT_EQ(copy.get_image().channels(), 3);
126+
EXPECT_EQ(original.get_image().width(), 224); // Original should be unchanged
136127
}
137128

138129
TEST_F(MultimodalInputTest, CopyAssignmentImage) {
@@ -143,10 +134,10 @@ TEST_F(MultimodalInputTest, CopyAssignmentImage) {
143134
copy = original;
144135

145136
EXPECT_TRUE(copy.is_image());
146-
EXPECT_EQ(copy.get_image().width, 224);
147-
EXPECT_EQ(copy.get_image().height, 224);
148-
EXPECT_EQ(copy.get_image().channels, 3);
149-
EXPECT_EQ(original.get_image().width, 224); // Original should be unchanged
137+
EXPECT_EQ(copy.get_image().width(), 224);
138+
EXPECT_EQ(copy.get_image().height(), 224);
139+
EXPECT_EQ(copy.get_image().channels(), 3);
140+
EXPECT_EQ(original.get_image().width(), 224); // Original should be unchanged
150141
}
151142

152143
// Test move constructor and assignment
@@ -174,32 +165,32 @@ TEST_F(MultimodalInputTest, MoveAssignmentText) {
174165

175166
TEST_F(MultimodalInputTest, MoveConstructorImage) {
176167
Image img = createTestImage();
177-
int width = img.width;
178-
int height = img.height;
179-
int channels = img.channels;
168+
int width = img.width();
169+
int height = img.height();
170+
int channels = img.channels();
180171
MultimodalInput original(std::move(img));
181172
MultimodalInput moved(std::move(original));
182173

183174
EXPECT_TRUE(moved.is_image());
184-
EXPECT_EQ(moved.get_image().width, width);
185-
EXPECT_EQ(moved.get_image().height, height);
186-
EXPECT_EQ(moved.get_image().channels, channels);
175+
EXPECT_EQ(moved.get_image().width(), width);
176+
EXPECT_EQ(moved.get_image().height(), height);
177+
EXPECT_EQ(moved.get_image().channels(), channels);
187178
}
188179

189180
TEST_F(MultimodalInputTest, MoveAssignmentImage) {
190181
Image img = createTestImage();
191-
int width = img.width;
192-
int height = img.height;
193-
int channels = img.channels;
182+
int width = img.width();
183+
int height = img.height();
184+
int channels = img.channels();
194185
MultimodalInput original(std::move(img));
195186
MultimodalInput moved(createTestText()); // Start with different type
196187

197188
moved = std::move(original);
198189

199190
EXPECT_TRUE(moved.is_image());
200-
EXPECT_EQ(moved.get_image().width, width);
201-
EXPECT_EQ(moved.get_image().height, height);
202-
EXPECT_EQ(moved.get_image().channels, channels);
191+
EXPECT_EQ(moved.get_image().width(), width);
192+
EXPECT_EQ(moved.get_image().height(), height);
193+
EXPECT_EQ(moved.get_image().channels(), channels);
203194
}
204195

205196
// Test getter methods with correct types
@@ -227,16 +218,13 @@ TEST_F(MultimodalInputTest, GetImageWithImageInput) {
227218

228219
// Test const lvalue reference version
229220
const MultimodalInput& const_input = input;
230-
EXPECT_EQ(const_input.get_image().width, 224);
231-
232-
// Test mutable lvalue reference version
233-
Image& mutable_image = input.get_image();
234-
mutable_image.width = 448;
235-
EXPECT_EQ(input.get_image().width, 448);
221+
EXPECT_EQ(const_input.get_image().width(), 224);
222+
EXPECT_EQ(const_input.get_image().height(), 224);
223+
EXPECT_EQ(const_input.get_image().channels(), 3);
236224

237225
// Test rvalue reference version
238226
Image moved_image = std::move(input).get_image();
239-
EXPECT_EQ(moved_image.width, 448);
227+
EXPECT_EQ(moved_image.width(), 224);
240228
}
241229

242230
// Test getter methods with wrong types (should throw)
@@ -296,18 +284,14 @@ TEST_F(MultimodalInputTest, TryGetImageWithImageInput) {
296284
const MultimodalInput& const_input = input;
297285
const Image* image_ptr = const_input.try_get_image();
298286
ASSERT_NE(image_ptr, nullptr);
299-
EXPECT_EQ(image_ptr->width, 224);
300-
EXPECT_EQ(image_ptr->height, 224);
301-
EXPECT_EQ(image_ptr->channels, 3);
287+
EXPECT_EQ(image_ptr->width(), 224);
288+
EXPECT_EQ(image_ptr->height(), 224);
289+
EXPECT_EQ(image_ptr->channels(), 3);
302290

303291
// Test mutable version
304292
Image* mutable_image_ptr = input.try_get_image();
305293
ASSERT_NE(mutable_image_ptr, nullptr);
306-
EXPECT_EQ(mutable_image_ptr->width, 224);
307-
308-
// Modify through pointer
309-
mutable_image_ptr->width = 448;
310-
EXPECT_EQ(input.get_image().width, 448);
294+
EXPECT_EQ(mutable_image_ptr->width(), 224);
311295
}
312296

313297
TEST_F(MultimodalInputTest, TryGetImageWithTextInput) {
@@ -344,22 +328,22 @@ TEST_F(MultimodalInputTest, MakeImageInputFromImage) {
344328
MultimodalInput input = make_image_input(img);
345329

346330
EXPECT_TRUE(input.is_image());
347-
EXPECT_EQ(input.get_image().width, 224);
348-
EXPECT_EQ(input.get_image().height, 224);
349-
EXPECT_EQ(input.get_image().channels, 3);
331+
EXPECT_EQ(input.get_image().width(), 224);
332+
EXPECT_EQ(input.get_image().height(), 224);
333+
EXPECT_EQ(input.get_image().channels(), 3);
350334
}
351335

352336
TEST_F(MultimodalInputTest, MakeImageInputFromRvalueImage) {
353337
Image img = createTestImage();
354-
int width = img.width;
355-
int height = img.height;
356-
int channels = img.channels;
338+
int width = img.width();
339+
int height = img.height();
340+
int channels = img.channels();
357341
MultimodalInput input = make_image_input(std::move(img));
358342

359343
EXPECT_TRUE(input.is_image());
360-
EXPECT_EQ(input.get_image().width, width);
361-
EXPECT_EQ(input.get_image().height, height);
362-
EXPECT_EQ(input.get_image().channels, channels);
344+
EXPECT_EQ(input.get_image().width(), width);
345+
EXPECT_EQ(input.get_image().height(), height);
346+
EXPECT_EQ(input.get_image().channels(), channels);
363347
}
364348

365349
// Test with different image sizes
@@ -368,10 +352,10 @@ TEST_F(MultimodalInputTest, DifferentImageSizes) {
368352
MultimodalInput input(small_img);
369353

370354
EXPECT_TRUE(input.is_image());
371-
EXPECT_EQ(input.get_image().width, 32);
372-
EXPECT_EQ(input.get_image().height, 32);
373-
EXPECT_EQ(input.get_image().channels, 1);
374-
EXPECT_EQ(input.get_image().data.size(), 32 * 32);
355+
EXPECT_EQ(input.get_image().width(), 32);
356+
EXPECT_EQ(input.get_image().height(), 32);
357+
EXPECT_EQ(input.get_image().channels(), 1);
358+
EXPECT_EQ(input.get_image().get_uint8_data().size(), 32 * 32);
375359
}
376360

377361
// Test with empty text
@@ -424,11 +408,10 @@ TEST_F(MultimodalInputTest, AssignmentBetweenTypes) {
424408
// Assign image to text input
425409
input = MultimodalInput(img);
426410
EXPECT_TRUE(input.is_image());
427-
EXPECT_EQ(input.get_image().width, 224);
411+
EXPECT_EQ(input.get_image().width(), 224);
428412

429413
// Assign text back to image input
430414
input = MultimodalInput(text);
431415
EXPECT_TRUE(input.is_text());
432416
EXPECT_EQ(input.get_text(), text);
433417
}
434-
} // namespace

0 commit comments

Comments
 (0)