Skip to content

Commit 93121f1

Browse files
ds-filipknefelFilip Knefel
andauthored
fix(embedding): embedding batches overwriting one another (#568)
Fix a bug with embeddings being either incorrectly assigned to the elements or missing when number of elements was larger than the embedding batch size. --------- Co-authored-by: Filip Knefel <[email protected]>
1 parent 08ae930 commit 93121f1

File tree

5 files changed

+174
-11
lines changed

5 files changed

+174
-11
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 1.2.1
2+
3+
* **Fix**: Embeddings are properly assigned when embedding in batches
4+
15
## 1.2.0
26
* **Drop Python 3.9 support**
37

test/integration/embedders/assets/DA-1p-with-duplicate-pages.pdf.json

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,160 @@
296296
"orig_elements": "eJxNUsty2zAM/BWMzrZrO3bl9tY61/bkWybj4QOUOKZIhQ+rnkz+vUs5bXKRQGCxWCz59Nqw44F9PlvdfKemlXu53u2k2uwkf23Xe7lv2ayF3B9MK9WuWVAzcBZaZAH8a1ODcwolKq7nN9SNdezFUM/N44/lZlxONvdLXUZnlci8HEXHaTVq07yj822c0WK8Q2zwX97LTviuVDzqTw37rnlGtjKcfRkkR+Qf5kz82GIrtvxtvzY7JZml1K2RfDCbrW5Zb9XDoakyM//JFXzqhb+Y4txtQUKGwfpZQCIwUsRnRaee6Wijcky9SAQD+qATmRBpYtbWdxRKJmQT09SHuTUH/DNFmy4zUvMQvFU0hpQ4JYzAPK8pKeEzGZ5oqHvSFIrT1NkrUxnByTaSiQxi6xxYKRU52HyPVE+CZABN9RjxPGVFPyEneMWY8HkpsolUZFyCXhA4ZkodECaSnGbSe52GUDc/wgirOdYVc491YLLFcumlCDgQDGUeRidigjNQDL89XXyYfKUyAvSiGlON8/cGQQlssPKTrlkMtPmQsVQcYdr/ibX5iO4cb5TFBQ7VjCxAwMZZwsf14PRLdJauDHSC7lCSu5H1Gve0qg/q31v7LWLE7Cuf6jt4e/4L7isM5g=="
297297
}
298298
},
299+
{
300+
"type": "CompositeElement",
301+
"element_id": "3e7327ee201e84f3061474204708d8f7",
302+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
303+
"metadata": {
304+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
305+
"filetype": "application/json",
306+
"languages": [
307+
"eng"
308+
],
309+
"page_number": 3,
310+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
311+
}
312+
},
313+
{
314+
"type": "CompositeElement",
315+
"element_id": "3e7327ee201e84f3061474204708d8f7",
316+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
317+
"metadata": {
318+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
319+
"filetype": "application/json",
320+
"languages": [
321+
"eng"
322+
],
323+
"page_number": 3,
324+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
325+
}
326+
},
327+
{
328+
"type": "CompositeElement",
329+
"element_id": "3e7327ee201e84f3061474204708d8f7",
330+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
331+
"metadata": {
332+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
333+
"filetype": "application/json",
334+
"languages": [
335+
"eng"
336+
],
337+
"page_number": 3,
338+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
339+
}
340+
},
341+
{
342+
"type": "CompositeElement",
343+
"element_id": "3e7327ee201e84f3061474204708d8f7",
344+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
345+
"metadata": {
346+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
347+
"filetype": "application/json",
348+
"languages": [
349+
"eng"
350+
],
351+
"page_number": 3,
352+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
353+
}
354+
},
355+
{
356+
"type": "CompositeElement",
357+
"element_id": "3e7327ee201e84f3061474204708d8f7",
358+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
359+
"metadata": {
360+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
361+
"filetype": "application/json",
362+
"languages": [
363+
"eng"
364+
],
365+
"page_number": 3,
366+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
367+
}
368+
},
369+
{
370+
"type": "CompositeElement",
371+
"element_id": "3e7327ee201e84f3061474204708d8f7",
372+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
373+
"metadata": {
374+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
375+
"filetype": "application/json",
376+
"languages": [
377+
"eng"
378+
],
379+
"page_number": 3,
380+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
381+
}
382+
},
383+
{
384+
"type": "CompositeElement",
385+
"element_id": "3e7327ee201e84f3061474204708d8f7",
386+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
387+
"metadata": {
388+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
389+
"filetype": "application/json",
390+
"languages": [
391+
"eng"
392+
],
393+
"page_number": 3,
394+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
395+
}
396+
},
397+
{
398+
"type": "CompositeElement",
399+
"element_id": "3e7327ee201e84f3061474204708d8f7",
400+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
401+
"metadata": {
402+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
403+
"filetype": "application/json",
404+
"languages": [
405+
"eng"
406+
],
407+
"page_number": 3,
408+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
409+
}
410+
},
411+
{
412+
"type": "CompositeElement",
413+
"element_id": "3e7327ee201e84f3061474204708d8f7",
414+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
415+
"metadata": {
416+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
417+
"filetype": "application/json",
418+
"languages": [
419+
"eng"
420+
],
421+
"page_number": 3,
422+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
423+
}
424+
},
425+
{
426+
"type": "CompositeElement",
427+
"element_id": "3e7327ee201e84f3061474204708d8f7",
428+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
429+
"metadata": {
430+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
431+
"filetype": "application/json",
432+
"languages": [
433+
"eng"
434+
],
435+
"page_number": 3,
436+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
437+
}
438+
},
439+
{
440+
"type": "CompositeElement",
441+
"element_id": "3e7327ee201e84f3061474204708d8f7",
442+
"text": "Arcane Horror\n\n\"Upon ascending to the second floor of the tower, we were greeted by a gruesome sight: a ragged collection of bones wearing the robes of one of the senior enchanters. I had known her for years, watched her raise countless apprentices, and now she was a mere puppet for some demon.\"\n\n6",
443+
"metadata": {
444+
"filename": "DA-1p-with-duplicate-pages.pdf.json",
445+
"filetype": "application/json",
446+
"languages": [
447+
"eng"
448+
],
449+
"page_number": 3,
450+
"orig_elements": "eJzNUrFu2zAQ/ZUD59i1JMKWsgXokCxBh3RKAuNEHiWiEkmQVF0jyL/3qCSAUXTJFkGD7r3H93THe3wRNNFMLh+tFtcgqnrXqaZtD1I2qqpbU0mJut7zY2quxRWImTJqzMj6F1E+jskvUVGpX5k3diKHc6nF95tNFTYnm8eNXsJkFWbaBBwobYM24l2dz2FVY3iTWO++vdMTumEpeuYfBblBPDNaHI5umXuKjDclNdOfXDxuokJHcOtj9LE4fJg/2DyRYOm/PbdVZypTy74ludv1ndwbSVztWp5FT/Jr9rwi8RM3dzmkJ/EzeAeYFDlt3QDZQx4JEinvNJjJ+wjerFj2J4pXcCJ+I8EQiTJp6M+AXCyU/MwH7TDma0YiDgOzyk8TqdJUsem9o8THMa5ZbBp9zwhTzHwEJXKWY8mpEV2mmLZwByNq+OX8ycFIEQzzZ7ZJ/D+Y1chJBY5oE3Hm4viKUwKeaZmMVcRC5IbYABJHnJBJmEsfYQmB8uq4dqBp9m77JC5X5h5j5Jv5TQ9lbv9ZnWp/kB3vjOq6inSrpD60TYO95Btoqu6Lrs7lJuwv+/3Buvs32evzXyI+VF4="
451+
}
452+
},
299453
{
300454
"type": "CompositeElement",
301455
"element_id": "3e7327ee201e84f3061474204708d8f7",

test/integration/embedders/conftest.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,15 @@ def do_POST(self):
4040
self.send_response(200)
4141
self.send_header("Content-type", "application/json")
4242
self.end_headers()
43+
request_body = json.loads(self.rfile.read(int(self.headers.get("Content-Length", 0))))
44+
input = request_body["input"]
4345
body = {
44-
"data": [{"object": "embedding", "embedding": [], "index": 0}],
46+
"data": [
47+
{"object": "embedding", "embedding": [], "index": i}
48+
for i, _ in enumerate([input] if isinstance(input, str) else input)
49+
],
4550
"object": "list",
46-
"model": "text-embedding-ada-002",
51+
"model": request_body["model"],
4752
"usage": {"prompt_tokens": 1, "total_tokens": 2},
4853
}
4954
self.wfile.write(json.dumps(body).encode("utf-8"))

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.2.0" # pragma: no cover
1+
__version__ = "1.2.1" # pragma: no cover

unstructured_ingest/embed/interfaces.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,14 @@ def embed_documents(self, elements: list[dict]) -> list[dict]:
6767
elements = elements.copy()
6868
elements_with_text = [e for e in elements if e.get("text")]
6969
texts = [e["text"] for e in elements_with_text]
70-
embeddings = []
70+
all_embeddings = []
7171
try:
7272
for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
73-
embeddings = self.embed_batch(client=client, batch=batch)
74-
embeddings.extend(embeddings)
73+
embeddings_batch = self.embed_batch(client=client, batch=batch)
74+
all_embeddings.extend(embeddings_batch)
7575
except Exception as e:
7676
raise self.wrap_error(e=e)
77-
for element, embedding in zip(elements_with_text, embeddings):
77+
for element, embedding in zip(elements_with_text, all_embeddings, strict=True):
7878
element[EMBEDDINGS_KEY] = embedding
7979
return elements
8080

@@ -123,14 +123,14 @@ async def embed_documents(self, elements: list[dict]) -> list[dict]:
123123
elements = elements.copy()
124124
elements_with_text = [e for e in elements if e.get("text")]
125125
texts = [e["text"] for e in elements_with_text]
126-
embeddings = []
126+
all_embeddings = []
127127
try:
128128
for batch in batch_generator(texts, batch_size=self.config.batch_size or len(texts)):
129-
embeddings = await self.embed_batch(client=client, batch=batch)
130-
embeddings.extend(embeddings)
129+
embeddings_batch = await self.embed_batch(client=client, batch=batch)
130+
all_embeddings.extend(embeddings_batch)
131131
except Exception as e:
132132
raise self.wrap_error(e=e)
133-
for element, embedding in zip(elements_with_text, embeddings):
133+
for element, embedding in zip(elements_with_text, all_embeddings, strict=True):
134134
element[EMBEDDINGS_KEY] = embedding
135135
return elements
136136

0 commit comments

Comments
 (0)