|
16 | 16 | }, |
17 | 17 | { |
18 | 18 | "cell_type": "code", |
19 | | - "execution_count": 2, |
| 19 | + "execution_count": 1, |
20 | 20 | "id": "be6ffdc5-fbaa-40b5-8b33-5540a3f957ba", |
21 | 21 | "metadata": {}, |
22 | 22 | "outputs": [], |
23 | 23 | "source": [ |
24 | 24 | "import numpy as np\n", |
25 | 25 | "\n", |
26 | 26 | "\n", |
27 | | - "def to_bit_vectors(embedding: list) -> list:\n", |
28 | | - " embeddings = []\n", |
29 | | - " for idx, patch_embedding in enumerate(embedding):\n", |
30 | | - " patch_embedding = np.array(patch_embedding)\n", |
31 | | - " binary_vector = (\n", |
32 | | - " np.packbits(np.where(patch_embedding > 0, 1, 0))\n", |
33 | | - " .astype(np.int8)\n", |
34 | | - " .tobytes()\n", |
35 | | - " .hex()\n", |
36 | | - " )\n", |
37 | | - " embeddings.append(binary_vector)\n", |
38 | | - " return embeddings" |
| 27 | + "def to_bit_vectors(embeddings: list) -> list:\n", |
| 28 | + " return [\n", |
| 29 | + " np.packbits(np.where(np.array(embedding) > 0, 1, 0))\n", |
| 30 | + " .astype(np.int8)\n", |
| 31 | + " .tobytes()\n", |
| 32 | + " .hex()\n", |
| 33 | + " for embedding in embeddings\n", |
| 34 | + " ]" |
39 | 35 | ] |
40 | 36 | }, |
41 | 37 | { |
|
49 | 45 | }, |
50 | 46 | { |
51 | 47 | "cell_type": "code", |
52 | | - "execution_count": 3, |
| 48 | + "execution_count": 2, |
53 | 49 | "id": "9871c9c5-c923-4deb-9f5b-aa6796ba0bbf", |
54 | 50 | "metadata": {}, |
55 | 51 | "outputs": [], |
|
70 | 66 | }, |
71 | 67 | { |
72 | 68 | "cell_type": "code", |
73 | | - "execution_count": 1, |
| 69 | + "execution_count": 3, |
74 | 70 | "id": "2de5872d-b372-40fe-85c5-111b9f9fa6c8", |
75 | 71 | "metadata": {}, |
76 | 72 | "outputs": [ |
77 | 73 | { |
78 | 74 | "name": "stdout", |
79 | 75 | "output_type": "stream", |
80 | 76 | "text": [ |
81 | | - "[INFO] Index 'searchlabs-colpali-token-pooling' already exists.\n" |
| 77 | + "[INFO] Creating index: searchlabs-colpali-token-pooling\n" |
82 | 78 | ] |
83 | 79 | } |
84 | 80 | ], |
|
126 | 122 | }, |
127 | 123 | { |
128 | 124 | "cell_type": "code", |
129 | | - "execution_count": 5, |
| 125 | + "execution_count": null, |
130 | 126 | "id": "bdf6ff33-3e22-43c1-9f3e-c3dd663b40e2", |
131 | 127 | "metadata": {}, |
132 | 128 | "outputs": [ |
133 | 129 | { |
134 | 130 | "data": { |
135 | 131 | "application/vnd.jupyter.widget-view+json": { |
136 | | - "model_id": "cef0c48b9b5d4b3982fbdb4773494ec8", |
| 132 | + "model_id": "047c33b3344f49328bda552b123c168d", |
137 | 133 | "version_major": 2, |
138 | 134 | "version_minor": 0 |
139 | 135 | }, |
|
143 | 139 | }, |
144 | 140 | "metadata": {}, |
145 | 141 | "output_type": "display_data" |
146 | | - }, |
147 | | - { |
148 | | - "name": "stdout", |
149 | | - "output_type": "stream", |
150 | | - "text": [ |
151 | | - "Completed indexing 500 documents\n" |
152 | | - ] |
153 | 142 | } |
154 | 143 | ], |
155 | 144 | "source": [ |
|
193 | 182 | }, |
194 | 183 | { |
195 | 184 | "cell_type": "code", |
196 | | - "execution_count": 6, |
| 185 | + "execution_count": null, |
197 | 186 | "id": "1dfc3713-d649-46db-aa81-171d6d92668e", |
198 | 187 | "metadata": {}, |
199 | | - "outputs": [ |
200 | | - { |
201 | | - "data": { |
202 | | - "application/vnd.jupyter.widget-view+json": { |
203 | | - "model_id": "d4361ebd1e59483aa8060a4fbe71715b", |
204 | | - "version_major": 2, |
205 | | - "version_minor": 0 |
206 | | - }, |
207 | | - "text/plain": [ |
208 | | - "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]" |
209 | | - ] |
210 | | - }, |
211 | | - "metadata": {}, |
212 | | - "output_type": "display_data" |
213 | | - } |
214 | | - ], |
| 188 | + "outputs": [], |
215 | 189 | "source": [ |
216 | 190 | "import torch\n", |
217 | 191 | "from PIL import Image\n", |
|
235 | 209 | }, |
236 | 210 | { |
237 | 211 | "cell_type": "code", |
238 | | - "execution_count": 7, |
| 212 | + "execution_count": null, |
239 | 213 | "id": "8e322b23-b4bc-409d-9e00-2dab93f6a295", |
240 | 214 | "metadata": {}, |
241 | | - "outputs": [ |
242 | | - { |
243 | | - "data": { |
244 | | - "text/html": [ |
245 | | - "<div style='display: flex; flex-wrap: wrap; align-items: flex-start;'><img src=\"searchlabs-colpali/image_3.jpg\" alt=\"image_3.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_104.jpg\" alt=\"image_104.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_2.jpg\" alt=\"image_2.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_12.jpg\" alt=\"image_12.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"><img src=\"searchlabs-colpali/image_120.jpg\" alt=\"image_120.jpg\" style=\"max-width:300px; height:auto; margin:10px;\"></div>" |
246 | | - ], |
247 | | - "text/plain": [ |
248 | | - "<IPython.core.display.HTML object>" |
249 | | - ] |
250 | | - }, |
251 | | - "metadata": {}, |
252 | | - "output_type": "display_data" |
253 | | - } |
254 | | - ], |
| 215 | + "outputs": [], |
255 | 216 | "source": [ |
256 | 217 | "from IPython.display import display, HTML\n", |
257 | 218 | "import os\n", |
|
0 commit comments