| 
33 | 33 |     },  | 
34 | 34 |     {  | 
35 | 35 |       "cell_type": "code",  | 
36 |  | -      "execution_count": 2,  | 
 | 36 | +      "execution_count": null,  | 
37 | 37 |       "metadata": {  | 
38 | 38 |         "colab": {  | 
39 | 39 |           "base_uri": "https://localhost:8080/"  | 
40 | 40 |         },  | 
41 | 41 |         "id": "UQezgPCG1vml",  | 
42 | 42 |         "outputId": "97b9bc03-da1b-439a-c37b-be6fdb58ab21"  | 
43 | 43 |       },  | 
44 |  | -      "outputs": [  | 
45 |  | -        {  | 
46 |  | -          "name": "stdout",  | 
47 |  | -          "output_type": "stream",  | 
48 |  | -          "text": [  | 
49 |  | -            "Cloning into 'temp_repo'...\n",  | 
50 |  | -            "remote: Enumerating objects: 138, done.\u001b[K\n",  | 
51 |  | -            "remote: Counting objects: 100% (138/138), done.\u001b[K\n",  | 
52 |  | -            "remote: Compressing objects: 100% (98/98), done.\u001b[K\n",  | 
53 |  | -            "remote: Total 138 (delta 68), reused 91 (delta 35), pack-reused 0\u001b[K\n",  | 
54 |  | -            "Receiving objects: 100% (138/138), 7.19 MiB | 4.45 MiB/s, done.\n",  | 
55 |  | -            "Resolving deltas: 100% (68/68), done.\n",  | 
56 |  | -            "mv: rename temp_repo/resources to ./resources: Directory not empty\n"  | 
57 |  | -          ]  | 
58 |  | -        }  | 
59 |  | -      ],  | 
 | 44 | +      "outputs": [],  | 
60 | 45 |       "source": [  | 
61 | 46 |         "# NBVAL_SKIP\n",  | 
62 | 47 |         "!git clone https://github.com/redis-developer/redis-ai-resources.git temp_repo\n",  | 
 | 
200 | 185 |           "name": "stdout",  | 
201 | 186 |           "output_type": "stream",  | 
202 | 187 |           "text": [  | 
203 |  | -            "Sample doc Doc ID: 67e07154-6ea0-4822-8957-ac1d212fc9ee\n",  | 
 | 188 | +            "Sample doc Doc ID: c013353e-dae7-4d17-befd-9e784c8acf79\n",  | 
204 | 189 |             "Text: UNITED STATES SECURITIES AND EXCHANGE COMMISSION Washington,\n",  | 
205 | 190 |             "D.C. 20549 FORM 10-K (Mark One) ☒ ANNUAL  REPORT PURSUANT T O SECTION\n",  | 
206 | 191 |             "13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year\n",  | 
 | 
245 | 230 |     },  | 
246 | 231 |     {  | 
247 | 232 |       "cell_type": "code",  | 
248 |  | -      "execution_count": 10,  | 
 | 233 | +      "execution_count": 4,  | 
249 | 234 |       "metadata": {},  | 
250 | 235 |       "outputs": [],  | 
251 | 236 |       "source": [  | 
252 | 237 |         "from llama_index.core import StorageContext\n",  | 
253 | 238 |         "\n",  | 
254 |  | -        "vector_store = RedisVectorStore(redis_url=REDIS_URL, index_name=\"llama\", overwrite=True)\n",  | 
 | 239 | +        "vector_store = RedisVectorStore(redis_url=REDIS_URL, overwrite=True)\n",  | 
255 | 240 |         "\n",  | 
256 | 241 |         "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",  | 
257 | 242 |         "\n",  | 
 | 
267 | 252 |     },  | 
268 | 253 |     {  | 
269 | 254 |       "cell_type": "code",  | 
270 |  | -      "execution_count": 11,  | 
 | 255 | +      "execution_count": 5,  | 
271 | 256 |       "metadata": {},  | 
272 | 257 |       "outputs": [],  | 
273 | 258 |       "source": [  | 
 | 
285 | 270 |     },  | 
286 | 271 |     {  | 
287 | 272 |       "cell_type": "code",  | 
288 |  | -      "execution_count": 12,  | 
 | 273 | +      "execution_count": 6,  | 
289 | 274 |       "metadata": {},  | 
290 | 275 |       "outputs": [  | 
291 | 276 |         {  | 
292 | 277 |           "name": "stdout",  | 
293 | 278 |           "output_type": "stream",  | 
294 | 279 |           "text": [  | 
295 |  | -            "Node ID: b561dd17-5545-4d3a-bc4f-18cb39c7c01e\n",  | 
 | 280 | +            "Node ID: d2e6cd9c-0716-49d8-8563-407a00d05445\n",  | 
296 | 281 |             "Text: Table of Contents FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS The\n",  | 
297 | 282 |             "following tables present NIKE Brand revenues disaggregated by\n",  | 
298 | 283 |             "reportable operating segment, distribution channel and major product\n",  | 
 | 
301 | 286 |             "fiscal 2022 on...\n",  | 
302 | 287 |             "Score:  0.900\n",  | 
303 | 288 |             "\n",  | 
304 |  | -            "Node ID: 0415f059-9258-426b-8b21-34b287b3c21b\n",  | 
 | 289 | +            "Node ID: 28542d3b-b345-4e9e-b675-f62361ec85d9\n",  | 
305 | 290 |             "Text: Table of Contents NORTH AMERICA (Dollars in millions) FISCAL\n",  | 
306 | 291 |             "2023FISCAL 2022 % CHANGE% CHANGE EXCLUDING CURRENCY CHANGESFISCAL 2021\n",  | 
307 | 292 |             "% CHANGE% CHANGE EXCLUDING CURRENCY CHANGES Revenues by: Footwear $\n",  | 
308 | 293 |             "14,897 $ 12,228 22 % 22 %$ 11,644 5 % 5 % Apparel 5,947 5,492 8 % 9 %\n",  | 
309 | 294 |             "5,028 9 % 9 % Equipment 764 633 21 % 21 % 507 25 % 25 % TOTAL REVENUES\n",  | 
310 | 295 |             "$ 21,6...\n",  | 
311 |  | -            "Score:  0.886\n",  | 
 | 296 | +            "Score:  0.885\n",  | 
312 | 297 |             "\n"  | 
313 | 298 |           ]  | 
314 | 299 |         }  | 
 | 
329 | 314 |     },  | 
330 | 315 |     {  | 
331 | 316 |       "cell_type": "code",  | 
332 |  | -      "execution_count": 13,  | 
 | 317 | +      "execution_count": 7,  | 
333 | 318 |       "metadata": {},  | 
334 | 319 |       "outputs": [  | 
335 | 320 |         {  | 
 | 
338 | 323 |               "\"NIKE's revenue in fiscal 23 was $51.2 billion.\""  | 
339 | 324 |             ]  | 
340 | 325 |           },  | 
341 |  | -          "execution_count": 13,  | 
 | 326 | +          "execution_count": 7,  | 
342 | 327 |           "metadata": {},  | 
343 | 328 |           "output_type": "execute_result"  | 
344 | 329 |         }  | 
 | 
348 | 333 |         "response.response"  | 
349 | 334 |       ]  | 
350 | 335 |     },  | 
 | 336 | +    {  | 
 | 337 | +      "cell_type": "markdown",  | 
 | 338 | +      "metadata": {},  | 
 | 339 | +      "source": [  | 
 | 340 | +        "### Use a custom index schema\n",  | 
 | 341 | +        "\n",  | 
 | 342 | +        "In most use cases, you need the ability to customize the underling index configuration\n",  | 
 | 343 | +        "and specification. For example, this is handy in order to define specific metadata filters you wish to enable.\n",  | 
 | 344 | +        "\n",  | 
 | 345 | +        "With Redis, this is as simple as defining an index schema object\n",  | 
 | 346 | +        "(from file or dict) and passing it through to the vector store client wrapper."  | 
 | 347 | +      ]  | 
 | 348 | +    },  | 
 | 349 | +    {  | 
 | 350 | +      "cell_type": "code",  | 
 | 351 | +      "execution_count": 8,  | 
 | 352 | +      "metadata": {},  | 
 | 353 | +      "outputs": [],  | 
 | 354 | +      "source": [  | 
 | 355 | +        "from redisvl.schema import IndexSchema\n",  | 
 | 356 | +        "\n",  | 
 | 357 | +        "\n",  | 
 | 358 | +        "custom_schema = IndexSchema.from_dict(\n",  | 
 | 359 | +        "    {\n",  | 
 | 360 | +        "        # customize basic index specs\n",  | 
 | 361 | +        "        \"index\": {\n",  | 
 | 362 | +        "            \"name\": \"custom_index\",\n",  | 
 | 363 | +        "            \"prefix\": \"docs\",\n",  | 
 | 364 | +        "            \"key_separator\": \":\",\n",  | 
 | 365 | +        "        },\n",  | 
 | 366 | +        "        # customize fields that are indexed\n",  | 
 | 367 | +        "        \"fields\": [\n",  | 
 | 368 | +        "            # required fields for llamaindex\n",  | 
 | 369 | +        "            {\"type\": \"tag\", \"name\": \"id\"},\n",  | 
 | 370 | +        "            {\"type\": \"tag\", \"name\": \"doc_id\"},\n",  | 
 | 371 | +        "            {\"type\": \"text\", \"name\": \"text\"},\n",  | 
 | 372 | +        "            # custom metadata fields\n",  | 
 | 373 | +        "            {\"type\": \"numeric\", \"name\": \"updated_at\"},\n",  | 
 | 374 | +        "            {\"type\": \"tag\", \"name\": \"file_name\"},\n",  | 
 | 375 | +        "            # custom vector field definition for cohere embeddings\n",  | 
 | 376 | +        "            {\n",  | 
 | 377 | +        "                \"type\": \"vector\",\n",  | 
 | 378 | +        "                \"name\": \"vector\",\n",  | 
 | 379 | +        "                \"attrs\": {\n",  | 
 | 380 | +        "                    \"dims\": 1536,\n",  | 
 | 381 | +        "                    \"algorithm\": \"hnsw\",\n",  | 
 | 382 | +        "                    \"distance_metric\": \"cosine\",\n",  | 
 | 383 | +        "                },\n",  | 
 | 384 | +        "            },\n",  | 
 | 385 | +        "        ],\n",  | 
 | 386 | +        "    }\n",  | 
 | 387 | +        ")"  | 
 | 388 | +      ]  | 
 | 389 | +    },  | 
 | 390 | +    {  | 
 | 391 | +      "cell_type": "code",  | 
 | 392 | +      "execution_count": 9,  | 
 | 393 | +      "metadata": {},  | 
 | 394 | +      "outputs": [  | 
 | 395 | +        {  | 
 | 396 | +          "data": {  | 
 | 397 | +            "text/plain": [  | 
 | 398 | +              "IndexInfo(name='custom_index', prefix='docs', key_separator=':', storage_type=<StorageType.HASH: 'hash'>)"  | 
 | 399 | +            ]  | 
 | 400 | +          },  | 
 | 401 | +          "execution_count": 9,  | 
 | 402 | +          "metadata": {},  | 
 | 403 | +          "output_type": "execute_result"  | 
 | 404 | +        }  | 
 | 405 | +      ],  | 
 | 406 | +      "source": [  | 
 | 407 | +        "custom_schema.index"  | 
 | 408 | +      ]  | 
 | 409 | +    },  | 
 | 410 | +    {  | 
 | 411 | +      "cell_type": "code",  | 
 | 412 | +      "execution_count": 10,  | 
 | 413 | +      "metadata": {},  | 
 | 414 | +      "outputs": [  | 
 | 415 | +        {  | 
 | 416 | +          "data": {  | 
 | 417 | +            "text/plain": [  | 
 | 418 | +              "{'id': TagField(name='id', type='tag', path=None, attrs=TagFieldAttributes(sortable=False, separator=',', case_sensitive=False, withsuffixtrie=False)),\n",  | 
 | 419 | +              " 'doc_id': TagField(name='doc_id', type='tag', path=None, attrs=TagFieldAttributes(sortable=False, separator=',', case_sensitive=False, withsuffixtrie=False)),\n",  | 
 | 420 | +              " 'text': TextField(name='text', type='text', path=None, attrs=TextFieldAttributes(sortable=False, weight=1, no_stem=False, withsuffixtrie=False, phonetic_matcher=None)),\n",  | 
 | 421 | +              " 'updated_at': NumericField(name='updated_at', type='numeric', path=None, attrs=NumericFieldAttributes(sortable=False)),\n",  | 
 | 422 | +              " 'file_name': TagField(name='file_name', type='tag', path=None, attrs=TagFieldAttributes(sortable=False, separator=',', case_sensitive=False, withsuffixtrie=False)),\n",  | 
 | 423 | +              " 'vector': HNSWVectorField(name='vector', type='vector', path=None, attrs=HNSWVectorFieldAttributes(dims=1536, algorithm=<VectorIndexAlgorithm.HNSW: 'HNSW'>, datatype=<VectorDataType.FLOAT32: 'FLOAT32'>, distance_metric=<VectorDistanceMetric.COSINE: 'COSINE'>, initial_cap=None, m=16, ef_construction=200, ef_runtime=10, epsilon=0.01))}"  | 
 | 424 | +            ]  | 
 | 425 | +          },  | 
 | 426 | +          "execution_count": 10,  | 
 | 427 | +          "metadata": {},  | 
 | 428 | +          "output_type": "execute_result"  | 
 | 429 | +        }  | 
 | 430 | +      ],  | 
 | 431 | +      "source": [  | 
 | 432 | +        "custom_schema.fields"  | 
 | 433 | +      ]  | 
 | 434 | +    },  | 
 | 435 | +    {  | 
 | 436 | +      "cell_type": "code",  | 
 | 437 | +      "execution_count": 11,  | 
 | 438 | +      "metadata": {},  | 
 | 439 | +      "outputs": [],  | 
 | 440 | +      "source": [  | 
 | 441 | +        "# from datetime import datetime\n",  | 
 | 442 | +        "\n",  | 
 | 443 | +        "\n",  | 
 | 444 | +        "# def date_to_timestamp(date_string: str) -> int:\n",  | 
 | 445 | +        "#     date_format: str = \"%Y-%m-%d\"\n",  | 
 | 446 | +        "#     return int(datetime.strptime(date_string, date_format).timestamp())\n",  | 
 | 447 | +        "\n",  | 
 | 448 | +        "\n",  | 
 | 449 | +        "# # iterate through documents and add new field\n",  | 
 | 450 | +        "# for document in docs:\n",  | 
 | 451 | +        "#     document.metadata[\"updated_at\"] = date_to_timestamp(\n",  | 
 | 452 | +        "#         document.metadata[\"last_modified_date\"]\n",  | 
 | 453 | +        "#     )"  | 
 | 454 | +      ]  | 
 | 455 | +    },  | 
 | 456 | +    {  | 
 | 457 | +      "cell_type": "code",  | 
 | 458 | +      "execution_count": 12,  | 
 | 459 | +      "metadata": {},  | 
 | 460 | +      "outputs": [],  | 
 | 461 | +      "source": [  | 
 | 462 | +        "vector_store = RedisVectorStore(\n",  | 
 | 463 | +        "    schema=custom_schema,  # provide customized schema\n",  | 
 | 464 | +        "    redis_url=REDIS_URL,\n",  | 
 | 465 | +        "    overwrite=True,\n",  | 
 | 466 | +        ")\n",  | 
 | 467 | +        "\n",  | 
 | 468 | +        "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",  | 
 | 469 | +        "\n",  | 
 | 470 | +        "# build and load index from documents and storage context\n",  | 
 | 471 | +        "index = VectorStoreIndex.from_documents(\n",  | 
 | 472 | +        "    docs, storage_context=storage_context\n",  | 
 | 473 | +        ")"  | 
 | 474 | +      ]  | 
 | 475 | +    },  | 
 | 476 | +    {  | 
 | 477 | +      "cell_type": "markdown",  | 
 | 478 | +      "metadata": {},  | 
 | 479 | +      "source": [  | 
 | 480 | +        "### Query the vector store and filter on metadata\n",  | 
 | 481 | +        "Now that we have additional metadata indexed in Redis, let's try some queries which add in filters. As an example, we'll do a search for chunks with the word \"audit\" from an exact file \"amzn-10k-2023.pdf\". "  | 
 | 482 | +      ]  | 
 | 483 | +    },  | 
 | 484 | +    {  | 
 | 485 | +      "cell_type": "code",  | 
 | 486 | +      "execution_count": 13,  | 
 | 487 | +      "metadata": {},  | 
 | 488 | +      "outputs": [],  | 
 | 489 | +      "source": [  | 
 | 490 | +        "from llama_index.core.vector_stores import (\n",  | 
 | 491 | +        "    MetadataFilters,\n",  | 
 | 492 | +        "    MetadataFilter,\n",  | 
 | 493 | +        "    ExactMatchFilter,\n",  | 
 | 494 | +        ")\n",  | 
 | 495 | +        "\n",  | 
 | 496 | +        "retriever = index.as_retriever(\n",  | 
 | 497 | +        "    similarity_top_k=3,\n",  | 
 | 498 | +        "    filters=MetadataFilters(\n",  | 
 | 499 | +        "        filters=[\n",  | 
 | 500 | +        "            ExactMatchFilter(key=\"file_name\", value=\"amzn-10k-2023.pdf\"),\n",  | 
 | 501 | +        "            MetadataFilter(\n",  | 
 | 502 | +        "                key=\"text\",\n",  | 
 | 503 | +        "                value=\"audit\",\n",  | 
 | 504 | +        "                operator=\"text_match\",\n",  | 
 | 505 | +        "            ),\n",  | 
 | 506 | +        "        ],\n",  | 
 | 507 | +        "        condition=\"and\",\n",  | 
 | 508 | +        "    ),\n",  | 
 | 509 | +        ")"  | 
 | 510 | +      ]  | 
 | 511 | +    },  | 
 | 512 | +    {  | 
 | 513 | +      "cell_type": "code",  | 
 | 514 | +      "execution_count": 14,  | 
 | 515 | +      "metadata": {},  | 
 | 516 | +      "outputs": [  | 
 | 517 | +        {  | 
 | 518 | +          "name": "stdout",  | 
 | 519 | +          "output_type": "stream",  | 
 | 520 | +          "text": [  | 
 | 521 | +            "Node ID: cd0c5d8f-e3b1-4cbb-aa6a-5960003cdb2d\n",  | 
 | 522 | +            "Text: Table of Contents valuation. In the ordinary course of our\n",  | 
 | 523 | +            "business, there are many transactions and calculations for which the\n",  | 
 | 524 | +            "ultimate tax determination is uncertain. Significant judgment is\n",  | 
 | 525 | +            "required in evaluating and estimating our tax expense, assets, and\n",  | 
 | 526 | +            "liabilities. We are also subject to tax controversies in various\n",  | 
 | 527 | +            "jurisdictions that can...\n",  | 
 | 528 | +            "Score:  0.746\n",  | 
 | 529 | +            "\n",  | 
 | 530 | +            "Node ID: 6745f668-4c7a-43bf-a9c3-9b04e1a497f8\n",  | 
 | 531 | +            "Text: Table of Contents Included in other income (expense), net in\n",  | 
 | 532 | +            "2021 and 2022 is a marketable equity securities valuation gain (loss)\n",  | 
 | 533 | +            "of $11.8 billion and $(12.7) billion from our equity investment in\n",  | 
 | 534 | +            "Rivian Automotive, Inc. (“Rivian”). Our investment in Rivian’s\n",  | 
 | 535 | +            "preferred stock was accounted for at cost, with adjustments for\n",  | 
 | 536 | +            "observable changes in ...\n",  | 
 | 537 | +            "Score:  0.740\n",  | 
 | 538 | +            "\n",  | 
 | 539 | +            "Node ID: 717666fe-fea5-488b-999c-84e6d8b9a0db\n",  | 
 | 540 | +            "Text: Exhibit 31.1 CERTIFICATIONS I, Andrew R. Jassy, certify that: 1.\n",  | 
 | 541 | +            "I have reviewed this Form 10-K of Amazon.com, Inc.; 2. Based on my\n",  | 
 | 542 | +            "knowledge, this report does not contain any untrue statement of a\n",  | 
 | 543 | +            "material fact or omit to state a material fact necessary to make the\n",  | 
 | 544 | +            "statements made, in light of the circumstances under which such\n",  | 
 | 545 | +            "statements were ...\n",  | 
 | 546 | +            "Score:  0.732\n",  | 
 | 547 | +            "\n"  | 
 | 548 | +          ]  | 
 | 549 | +        }  | 
 | 550 | +      ],  | 
 | 551 | +      "source": [  | 
 | 552 | +        "result_nodes = retriever.retrieve(\"What did the author learn?\")\n",  | 
 | 553 | +        "\n",  | 
 | 554 | +        "for node in result_nodes:\n",  | 
 | 555 | +        "    print(node)"  | 
 | 556 | +      ]  | 
 | 557 | +    },  | 
351 | 558 |     {  | 
352 | 559 |       "cell_type": "code",  | 
353 | 560 |       "execution_count": null,  | 
 | 
376 | 583 |       "name": "python",  | 
377 | 584 |       "nbconvert_exporter": "python",  | 
378 | 585 |       "pygments_lexer": "ipython3",  | 
379 |  | -      "version": "3.9.12"  | 
 | 586 | +      "version": "3.11.9"  | 
380 | 587 |     },  | 
381 | 588 |     "widgets": {  | 
382 | 589 |       "application/vnd.jupyter.widget-state+json": {  | 
 | 
0 commit comments