diff --git a/python-recipes/RAG/01_redisvl.ipynb b/python-recipes/RAG/01_redisvl.ipynb index 5b8a87ac..19f0cc86 100644 --- a/python-recipes/RAG/01_redisvl.ipynb +++ b/python-recipes/RAG/01_redisvl.ipynb @@ -103,7 +103,7 @@ } ], "source": [ - "%pip install -q redis redisvl langchain-community pypdf sentence-transformers langchain openai pandas" + "%pip install -q redis \"redisvl>=0.4.1\" langchain-community pypdf sentence-transformers langchain openai pandas" ] }, { @@ -186,15 +186,13 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": { "id": "ggh5TzhkJkD9" }, "outputs": [], "source": [ "import os\n", - "import warnings\n", - "#warnings.filterwarnings('ignore')\n", "\n", "# Replace values below with your own if using Redis Cloud instance\n", "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") # ex: \"redis-18374.c253.us-central1-1.gce.cloud.redislabs.com\"\n", @@ -244,7 +242,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Listing available documents ... ['resources/jnj-10k-2023.pdf', 'resources/retrieval_basic_rag_test.csv', 'resources/aapl-10k-2023.pdf', 'resources/nke-10k-2023.pdf', 'resources/amzn-10k-2023.pdf', 'resources/testset_15.csv', 'resources/generation_basic_rag_test.csv', 'resources/testset.csv', 'resources/msft-10k-2023.pdf', 'resources/propositions.json', 'resources/nvd-10k-2023.pdf']\n" + "Listing available documents ... ['resources/nke-10k-2023.pdf', 'resources/amzn-10k-2023.pdf', 'resources/jnj-10k-2023.pdf', 'resources/aapl-10k-2023.pdf', 'resources/testset_15.csv', 'resources/retrieval_basic_rag_test.csv', 'resources/2022-chevy-colorado-ebrochure.pdf', 'resources/nvd-10k-2023.pdf', 'resources/testset.csv', 'resources/msft-10k-2023.pdf', 'resources/propositions.json', 'resources/generation_basic_rag_test.csv']\n" ] } ], @@ -461,217 +459,6 @@ "outputId": "b0f0d2c1-41dc-4932-990b-53d2912af19e" }, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "20:48:16 numexpr.utils INFO NumExpr defaulting to 2 threads.\n", - "20:48:30 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: cuda\n", - "20:48:30 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cbd44245af844dca8e568691cc1c15c5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "modules.json: 0%| | 0.00/349 [00:00[KNN 3 @text_embedding $vector AS vector_distance] RETURN 3 chunk_id content vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3'" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -954,7 +723,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -966,14 +735,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"pd\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"chunk:88\",\n \"chunk:80\",\n \"chunk:87\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector_distance\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"0.337694585323\",\n \"0.342052936554\",\n \"0.35776078701\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chunk_id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"88\",\n \"80\",\n \"87\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Asia Pacific & Latin America 1,932 1,896 2 % 1,530 24 %\\nGlobal Brand Divisions (4,841) (4,262) -14 % (3,656) -17 %\\nTOTAL NIKE BRAND $ 8,359 $ 8,406 -1 % $ 8,641 -3 %\\nConverse 676 669 1 % 543 23 %\\nCorporate (2,840) (2,219) -28 % (2,261) 2 %\\nTOTAL NIKE, INC. EARNINGS BEFORE INTEREST ANDTAXES $ 6,195 $ 6,856 -10 % $ 6,923 -1 %\\nEBIT margin 12.1 % 14.7 % 15.5 %\\nInterest expense (income), net (6) 205 \\u2014 262 \\u2014 \\nTOTAL NIKE, INC. INCOME BEFORE INCOME TAXES $ 6,201 $ 6,651 -7 % $ 6,661 0 %\\n(1) Total NIKE Brand EBIT, Total NIKE, Inc. EBIT and EBIT Margin represent non-GAAP financial measures. See \\\"Use of Non-GAAP Financial Measures\\\" for further information.\\n(1) (1)\\n(2)\\n(3)\\n(4)\\n(1)\\n(1)\\n(1)\\n2023 FORM 10-K 36\",\n \"Table of Contents\\nCONSOLIDATED OPERATING RESULTS\\nREVENUES\\n(Dollars in millions) FISCAL2023 FISCAL2022 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES FISCAL2021 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES\\nNIKE, Inc. Revenues:\\nNIKE Brand Revenues by:\\nFootwear $ 33,135 $ 29,143 14 % 20 %$ 28,021 4 % 4 %\\nApparel 13,843 13,567 2 % 8 % 12,865 5 % 6 %\\nEquipment 1,727 1,624 6 % 13 % 1,382 18 % 18 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTotal NIKE Brand Revenues $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nConverse 2,427 2,346 3 % 8 % 2,205 6 % 7 %\\nCorporate 27 (72) \\u2014 \\u2014 40 \\u2014 \\u2014 \\nTOTAL NIKE, INC. REVENUES $ 51,217 $ 46,710 10 % 16 %$ 44,538 5 % 6 %\\nSupplemental NIKE Brand Revenues Details:\\nNIKE Brand Revenues by:\\nSales to Wholesale Customers $ 27,397 $ 25,608 7 % 14 %$ 25,898 -1 % -1 %\\nSales through NIKE Direct 21,308 18,726 14 % 20 % 16,370 14 % 15 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTOTAL NIKE BRAND REVENUES $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nNIKE Brand Revenues on a Wholesale Equivalent Basis :\\nSales to Wholesale Customers $ 27,397 $ 25,608 7 % 14 %$ 25,898 -1 % -1 %\\nSales from our Wholesale Operations to NIKE Direct Operations 12,730 10,543 21 % 27 % 9,872 7 % 7 %\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES $ 40,127 $ 36,151 11 % 18 %$ 35,770 1 % 1 %\\nNIKE Brand Wholesale Equivalent Revenues by:\\nMen's $ 20,733 $ 18,797 10 % 17 %$ 18,391 2 % 3 %\\nWomen's 8,606 8,273 4 % 11 % 8,225 1 % 1 %\\nNIKE Kids' 5,038 4,874 3 % 10 % 4,882 0 % 0 %\\nJordan Brand 6,589 5,122 29 % 35 % 4,780 7 % 7 %\\nOthers (839) (915) 8 % -3 % (508) -80 % -79 %\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES $ 40,127 $ 36,151 11 % 18 %$ 35,770 1 % 1 %\\n(1) The percent change excluding currency changes and the presentation of wholesale equivalent revenues represent non-GAAP financial measures. For further information, see \\\"Use of Non-GAAPFinancial Measures\\\".\\n(2) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n(3) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\nmanaged through our central foreign exchange risk management program.\",\n \"Table of Contents\\nOPERATING SEGMENTS\\nAs discussed in Note 15 \\u2014 Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments\\nare evidence of the structure of the Company's internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE\\nBrand sales activity.\\nThe breakdown of Revenues is as follows:\\n(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES FISCAL 2021 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES\\nNorth America $ 21,608 $ 18,353 18 % 18 %$ 17,179 7 % 7 %\\nEurope, Middle East & Africa 13,418 12,479 8 % 21 % 11,456 9 % 12 %\\nGreater China 7,248 7,547 -4 % 4 % 8,290 -9 % -13 %\\nAsia Pacific & Latin America 6,431 5,955 8 % 17 % 5,343 11 % 16 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTOTAL NIKE BRAND $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nConverse 2,427 2,346 3 % 8 % 2,205 6 % 7 %\\nCorporate 27 (72) \\u2014 \\u2014 40 \\u2014 \\u2014 \\nTOTAL NIKE, INC. REVENUES $ 51,217 $ 46,710 10 % 16 %$ 44,538 5 % 6 %\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \\\"Use of Non-GAAP Financial Measures\\\".\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 \\u2014 Acquisitions and Divestitures of the Notes to ConsolidatedFinancial Statements contained in Item 8 of this Annual Report.\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, butmanaged through our central foreign exchange risk management program.\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\\\"EBIT\\\"). As discussed in Note 15 \\u2014 Operating\\nSegments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\nThe breakdown of EBIT is as follows:\\n(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE FISCAL 2021 % CHANGE\\nNorth America $ 5,454 $ 5,114 7 % $ 5,089 0 %\\nEurope, Middle East & Africa 3,531 3,293 7 % 2,435 35 %\\nGreater China 2,283 2,365 -3 % 3,243 -27 %\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "" ], "text/plain": [ " id vector_distance chunk_id \\\n", - "0 chunk:88 0.337694585323 88 \n", - "1 chunk:80 0.342052936554 80 \n", - "2 chunk:87 0.35776078701 87 \n", + "0 chunk:87 0.334264576435 87 \n", + "1 chunk:79 0.350993335247 79 \n", + "2 chunk:86 0.371814072132 86 \n", "\n", " content \n", "0 Asia Pacific & Latin America 1,932 1,896 2 % 1... \n", - "1 Table of Contents\\nCONSOLIDATED OPERATING RESU... \n", - "2 Table of Contents\\nOPERATING SEGMENTS\\nAs disc... " + "1 Table of Contents\\nCONSOLIDA TED OPERA TING RE... \n", + "2 Table of Contents\\nOPERA TING SEGMENTS\\nAs dis... " ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1259,7 +813,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1272,9 +826,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "88 0.337694585323\n", - "80 0.342052936554\n", - "87 0.35776078701\n" + "87 0.334264576435\n", + "79 0.350993335247\n", + "86 0.371814072132\n" ] } ], @@ -1295,7 +849,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1307,14 +861,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "repr_error": "Out of range float values are not JSON compliant: nan", - "type": "dataframe" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "" ], "text/plain": [ " id payload vector_distance chunk_id\n", - "0 chunk:80 None 0.342052936554 80\n", - "1 chunk:83 None 0.37876611948 83\n", - "2 chunk:87 None 0.35776078701 87\n", - "3 chunk:88 None 0.337694585323 88" + "0 chunk:79 None 0.350993335247 79\n", + "1 chunk:82 None 0.378765702248 82\n", + "2 chunk:86 None 0.371814072132 86\n", + "3 chunk:87 None 0.334264576435 87" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1627,7 +966,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1639,14 +978,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"pd\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"chunk:129\",\n \"chunk:39\",\n \"chunk:83\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector_distance\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"0.41875731945\",\n \"0.683842301369\",\n \"0.37876611948\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Table of Contents\\nNIKE, INC.\\nCONSOLIDATED STATEMENTS OF INCOME\\nYEAR ENDED MAY 31,\\n(In millions, except per share data) 2023 2022 2021\\nRevenues $ 51,217 $ 46,710 $ 44,538 \\nCost of sales 28,925 25,231 24,576 \\nGross profit 22,292 21,479 19,962 \\nDemand creation expense 4,060 3,850 3,114 \\nOperating overhead expense 12,317 10,954 9,911 \\nTotal selling and administrative expense 16,377 14,804 13,025 \\nInterest expense (income), net (6) 205 262 \\nOther (income) expense, net (280) (181) 14 \\nIncome before income taxes 6,201 6,651 6,661 \\nIncome tax expense 1,131 605 934 \\nNET INCOME $ 5,070 $ 6,046 $ 5,727 \\nEarnings per common share:\\nBasic $ 3.27 $ 3.83 $ 3.64 \\nDiluted $ 3.23 $ 3.75 $ 3.56 \\nWeighted average common shares outstanding:\\nBasic 1,551.6 1,578.8 1,573.0 \\nDiluted 1,569.8 1,610.8 1,609.4 \\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n2023 FORM 10-K 55\",\n \"manner. However, lead times for many of our products may make it more difficult for us to respond rapidly to new or changing product trends or consumer preferences. All\\nof our products are subject to changing consumer preferences that cannot be predicted with certainty. Our new products may not receive consumer acceptance as\\nconsumer preferences could shift rapidly to different types of performance products or away from these types of products altogether, and our future success depends in\\npart on our ability to anticipate and respond to these changes. If we fail to anticipate accurately and respond to trends and shifts in consumer preferences by adjusting the\\nmix of existing product offerings, developing new products, designs, styles and categories, and influencing sports and fitness preferences through extensive marketing, we\\ncould experience lower sales, excess inventories or lower profit margins, any of which could have an adverse effect on our results of operations and financial condition. In\\naddition, we market our products globally through a diverse spectrum of advertising and promotional programs and campaigns, including social media and other digital\\nadvertising networks. If we do not successfully market our products or if advertising and promotional costs increase, these factors could have an adverse effect on our\\nbusiness, financial condition and results of operations.\\nWe rely on technical innovation and high-quality products to compete in the market for our products.\\nTechnical innovation and quality control in the design and manufacturing processes of footwear, apparel, equipment and other products and services are essential to the\\ncommercial success of our products and development of new products. Research and development play a key role in technical innovation. We rely upon specialists in the\\nfields of biomechanics, chemistry, exercise physiology, engineering, digital technologies, industrial design, sustainability and related fields, as well as research committees\\nand advisory boards made up of athletes, coaches, trainers, equipment managers, orthopedists, podiatrists and other experts to develop and test cutting-edge\\nperformance products. While we strive to produce products that help to enhance athletic performance and reduce injury and maximize comfort, if we fail to introduce\\ntechnical innovation in our products, consumer demand for our products could decline, and if we experience problems with the quality of our products, we may incur\",\n \"Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 COMPARED TO FISCAL 2022\\nFor fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to\\n43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:\\n*Wholesale equivalent\\nThe decrease in gross margin for fiscal 2023 was primarily due to:\\n\\u2022 Higher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as\\nproduct mix;\\n\\u2022 Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity in\\nthe prior period resulting from lower available inventory supply;\\n\\u2022 Unfavorable changes in net foreign currency exchange rates, including hedges; and\\n\\u2022 Lower off-price margin, on a wholesale equivalent basis.\\nThis was partially offset by:\\n\\u2022 Higher NIKE Brand full-price ASP, net of discounts, on a wholesale equivalent basis, due primarily to strategic pricing actions and product mix; and\\n\\u2022 Lower other costs, primarily due to higher inventory obsolescence reserves recognized in Greater China in the fourth quarter of fiscal 2022.\\nTOTAL SELLING AND ADMINISTRATIVE EXPENSE\\n(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE FISCAL 2021 % CHANGE\\nDemand creation expense $ 4,060 $ 3,850 5 % $ 3,114 24 %\\nOperating overhead expense 12,317 10,954 12 % 9,911 11 %\\nTotal selling and administrative expense $ 16,377 $ 14,804 11 % $ 13,025 14 %\\n% of revenues 32.0 % 31.7 % 30 bps 29.2 % 250 bps\\n(1) Demand creation expense consists of advertising and promotion costs, including costs of endorsement contracts, complimentary product, television, digital and print advertising and media costs, brandevents and retail brand presentation.\\nFISCAL 2023 COMPARED TO FISCAL 2022\\nDemand creation expense increased 5% for fiscal 2023, primarily due to higher advertising and marketing expense and higher sports marketing expense. Changes in\\nforeign currency exchange rates decreased Demand creation expense by approximately 4 percentage points.\\nOperating overhead expense increased 12%, primarily due to higher wage-related expenses, NIKE Direct variable costs, strategic technology enterprise investments and\\nother administrative costs. Changes in foreign currency exchange rates decreased Operating overhead expense by approximately 3 percentage points.\\n(1)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "" ], "text/plain": [ " id vector_distance \\\n", - "0 chunk:83 0.37876611948 \n", - "1 chunk:129 0.41875731945 \n", - "2 chunk:168 0.657553255558 \n", - "3 chunk:39 0.683842301369 \n", + "0 chunk:82 0.378765702248 \n", + "1 chunk:128 0.418757259846 \n", + "2 chunk:72 0.466709017754 \n", + "3 chunk:62 0.493393957615 \n", "\n", " content \n", "0 Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 C... \n", "1 Table of Contents\\nNIKE, INC.\\nCONSOLIDATED ST... \n", - "2 Table of Contents\\nNOTE 10 — EARNINGS PER SHAR... \n", - "3 manner. However, lead times for many of our pr... " + "2 Table of Contents\\nITEM 7. MANAGEM ENT'S DISCU... \n", + "3 existing businesses, such as our NIKE Direct o... " ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1957,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": { "id": "bCffoZRx1ujD" }, @@ -1977,7 +1101,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1989,14 +1113,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"pd\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"chunk:80\",\n \"chunk:83\",\n \"chunk:88\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector_distance\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"0.342052936554\",\n \"0.37876611948\",\n \"0.337694585323\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Table of Contents\\nCONSOLIDATED OPERATING RESULTS\\nREVENUES\\n(Dollars in millions) FISCAL2023 FISCAL2022 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES FISCAL2021 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES\\nNIKE, Inc. Revenues:\\nNIKE Brand Revenues by:\\nFootwear $ 33,135 $ 29,143 14 % 20 %$ 28,021 4 % 4 %\\nApparel 13,843 13,567 2 % 8 % 12,865 5 % 6 %\\nEquipment 1,727 1,624 6 % 13 % 1,382 18 % 18 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTotal NIKE Brand Revenues $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nConverse 2,427 2,346 3 % 8 % 2,205 6 % 7 %\\nCorporate 27 (72) \\u2014 \\u2014 40 \\u2014 \\u2014 \\nTOTAL NIKE, INC. REVENUES $ 51,217 $ 46,710 10 % 16 %$ 44,538 5 % 6 %\\nSupplemental NIKE Brand Revenues Details:\\nNIKE Brand Revenues by:\\nSales to Wholesale Customers $ 27,397 $ 25,608 7 % 14 %$ 25,898 -1 % -1 %\\nSales through NIKE Direct 21,308 18,726 14 % 20 % 16,370 14 % 15 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTOTAL NIKE BRAND REVENUES $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nNIKE Brand Revenues on a Wholesale Equivalent Basis :\\nSales to Wholesale Customers $ 27,397 $ 25,608 7 % 14 %$ 25,898 -1 % -1 %\\nSales from our Wholesale Operations to NIKE Direct Operations 12,730 10,543 21 % 27 % 9,872 7 % 7 %\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES $ 40,127 $ 36,151 11 % 18 %$ 35,770 1 % 1 %\\nNIKE Brand Wholesale Equivalent Revenues by:\\nMen's $ 20,733 $ 18,797 10 % 17 %$ 18,391 2 % 3 %\\nWomen's 8,606 8,273 4 % 11 % 8,225 1 % 1 %\\nNIKE Kids' 5,038 4,874 3 % 10 % 4,882 0 % 0 %\\nJordan Brand 6,589 5,122 29 % 35 % 4,780 7 % 7 %\\nOthers (839) (915) 8 % -3 % (508) -80 % -79 %\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES $ 40,127 $ 36,151 11 % 18 %$ 35,770 1 % 1 %\\n(1) The percent change excluding currency changes and the presentation of wholesale equivalent revenues represent non-GAAP financial measures. For further information, see \\\"Use of Non-GAAPFinancial Measures\\\".\\n(2) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n(3) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\nmanaged through our central foreign exchange risk management program.\",\n \"Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 COMPARED TO FISCAL 2022\\nFor fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to\\n43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:\\n*Wholesale equivalent\\nThe decrease in gross margin for fiscal 2023 was primarily due to:\\n\\u2022 Higher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as\\nproduct mix;\\n\\u2022 Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity in\\nthe prior period resulting from lower available inventory supply;\\n\\u2022 Unfavorable changes in net foreign currency exchange rates, including hedges; and\\n\\u2022 Lower off-price margin, on a wholesale equivalent basis.\\nThis was partially offset by:\\n\\u2022 Higher NIKE Brand full-price ASP, net of discounts, on a wholesale equivalent basis, due primarily to strategic pricing actions and product mix; and\\n\\u2022 Lower other costs, primarily due to higher inventory obsolescence reserves recognized in Greater China in the fourth quarter of fiscal 2022.\\nTOTAL SELLING AND ADMINISTRATIVE EXPENSE\\n(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE FISCAL 2021 % CHANGE\\nDemand creation expense $ 4,060 $ 3,850 5 % $ 3,114 24 %\\nOperating overhead expense 12,317 10,954 12 % 9,911 11 %\\nTotal selling and administrative expense $ 16,377 $ 14,804 11 % $ 13,025 14 %\\n% of revenues 32.0 % 31.7 % 30 bps 29.2 % 250 bps\\n(1) Demand creation expense consists of advertising and promotion costs, including costs of endorsement contracts, complimentary product, television, digital and print advertising and media costs, brandevents and retail brand presentation.\\nFISCAL 2023 COMPARED TO FISCAL 2022\\nDemand creation expense increased 5% for fiscal 2023, primarily due to higher advertising and marketing expense and higher sports marketing expense. Changes in\\nforeign currency exchange rates decreased Demand creation expense by approximately 4 percentage points.\\nOperating overhead expense increased 12%, primarily due to higher wage-related expenses, NIKE Direct variable costs, strategic technology enterprise investments and\\nother administrative costs. Changes in foreign currency exchange rates decreased Operating overhead expense by approximately 3 percentage points.\\n(1)\",\n \"Asia Pacific & Latin America 1,932 1,896 2 % 1,530 24 %\\nGlobal Brand Divisions (4,841) (4,262) -14 % (3,656) -17 %\\nTOTAL NIKE BRAND $ 8,359 $ 8,406 -1 % $ 8,641 -3 %\\nConverse 676 669 1 % 543 23 %\\nCorporate (2,840) (2,219) -28 % (2,261) 2 %\\nTOTAL NIKE, INC. EARNINGS BEFORE INTEREST ANDTAXES $ 6,195 $ 6,856 -10 % $ 6,923 -1 %\\nEBIT margin 12.1 % 14.7 % 15.5 %\\nInterest expense (income), net (6) 205 \\u2014 262 \\u2014 \\nTOTAL NIKE, INC. INCOME BEFORE INCOME TAXES $ 6,201 $ 6,651 -7 % $ 6,661 0 %\\n(1) Total NIKE Brand EBIT, Total NIKE, Inc. EBIT and EBIT Margin represent non-GAAP financial measures. See \\\"Use of Non-GAAP Financial Measures\\\" for further information.\\n(1) (1)\\n(2)\\n(3)\\n(4)\\n(1)\\n(1)\\n(1)\\n2023 FORM 10-K 36\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "" ], "text/plain": [ " id vector_distance content\n", - "0 chunk:88 0.337694585323 Asia Pacific & Latin America 1,932 1,896 2 % 1...\n", - "1 chunk:80 0.342052936554 Table of Contents\\nCONSOLIDATED OPERATING RESU...\n", - "2 chunk:87 0.35776078701 Table of Contents\\nOPERATING SEGMENTS\\nAs disc...\n", - "3 chunk:83 0.37876611948 Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 C..." + "0 chunk:87 0.334264576435 Asia Pacific & Latin America 1,932 1,896 2 % 1...\n", + "1 chunk:79 0.350993335247 Table of Contents\\nCONSOLIDA TED OPERA TING RE...\n", + "2 chunk:86 0.371814072132 Table of Contents\\nOPERA TING SEGMENTS\\nAs dis...\n", + "3 chunk:82 0.378765702248 Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 C..." ] }, - "execution_count": 19, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -2277,7 +1186,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2289,14 +1198,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"pd\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"chunk:80\",\n \"chunk:83\",\n \"chunk:88\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector_distance\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"0.342052936554\",\n \"0.37876611948\",\n \"0.337694585323\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Table of Contents\\nCONSOLIDATED OPERATING RESULTS\\nREVENUES\\n(Dollars in millions) FISCAL2023 FISCAL2022 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES FISCAL2021 % CHANGE\\n% CHANGEEXCLUDINGCURRENCYCHANGES\\nNIKE, Inc. Revenues:\\nNIKE Brand Revenues by:\\nFootwear $ 33,135 $ 29,143 14 % 20 %$ 28,021 4 % 4 %\\nApparel 13,843 13,567 2 % 8 % 12,865 5 % 6 %\\nEquipment 1,727 1,624 6 % 13 % 1,382 18 % 18 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTotal NIKE Brand Revenues $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nConverse 2,427 2,346 3 % 8 % 2,205 6 % 7 %\\nCorporate 27 (72) \\u2014 \\u2014 40 \\u2014 \\u2014 \\nTOTAL NIKE, INC. REVENUES $ 51,217 $ 46,710 10 % 16 %$ 44,538 5 % 6 %\\nSupplemental NIKE Brand Revenues Details:\\nNIKE Brand Revenues by:\\nSales to Wholesale Customers $ 27,397 $ 25,608 7 % 14 %$ 25,898 -1 % -1 %\\nSales through NIKE Direct 21,308 18,726 14 % 20 % 16,370 14 % 15 %\\nGlobal Brand Divisions 58 102 -43 % -43 % 25 308 % 302 %\\nTOTAL NIKE BRAND REVENUES $ 48,763 $ 44,436 10 % 16 %$ 42,293 5 % 6 %\\nNIKE Brand Revenues on a Wholesale Equivalent Basis :\\nSales to Wholesale Customers $ 27,397 $ 25,608 7 % 14 %$ 25,898 -1 % -1 %\\nSales from our Wholesale Operations to NIKE Direct Operations 12,730 10,543 21 % 27 % 9,872 7 % 7 %\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES $ 40,127 $ 36,151 11 % 18 %$ 35,770 1 % 1 %\\nNIKE Brand Wholesale Equivalent Revenues by:\\nMen's $ 20,733 $ 18,797 10 % 17 %$ 18,391 2 % 3 %\\nWomen's 8,606 8,273 4 % 11 % 8,225 1 % 1 %\\nNIKE Kids' 5,038 4,874 3 % 10 % 4,882 0 % 0 %\\nJordan Brand 6,589 5,122 29 % 35 % 4,780 7 % 7 %\\nOthers (839) (915) 8 % -3 % (508) -80 % -79 %\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES $ 40,127 $ 36,151 11 % 18 %$ 35,770 1 % 1 %\\n(1) The percent change excluding currency changes and the presentation of wholesale equivalent revenues represent non-GAAP financial measures. For further information, see \\\"Use of Non-GAAPFinancial Measures\\\".\\n(2) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n(3) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\nmanaged through our central foreign exchange risk management program.\",\n \"Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 COMPARED TO FISCAL 2022\\nFor fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to\\n43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:\\n*Wholesale equivalent\\nThe decrease in gross margin for fiscal 2023 was primarily due to:\\n\\u2022 Higher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as\\nproduct mix;\\n\\u2022 Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity in\\nthe prior period resulting from lower available inventory supply;\\n\\u2022 Unfavorable changes in net foreign currency exchange rates, including hedges; and\\n\\u2022 Lower off-price margin, on a wholesale equivalent basis.\\nThis was partially offset by:\\n\\u2022 Higher NIKE Brand full-price ASP, net of discounts, on a wholesale equivalent basis, due primarily to strategic pricing actions and product mix; and\\n\\u2022 Lower other costs, primarily due to higher inventory obsolescence reserves recognized in Greater China in the fourth quarter of fiscal 2022.\\nTOTAL SELLING AND ADMINISTRATIVE EXPENSE\\n(Dollars in millions) FISCAL 2023 FISCAL 2022 % CHANGE FISCAL 2021 % CHANGE\\nDemand creation expense $ 4,060 $ 3,850 5 % $ 3,114 24 %\\nOperating overhead expense 12,317 10,954 12 % 9,911 11 %\\nTotal selling and administrative expense $ 16,377 $ 14,804 11 % $ 13,025 14 %\\n% of revenues 32.0 % 31.7 % 30 bps 29.2 % 250 bps\\n(1) Demand creation expense consists of advertising and promotion costs, including costs of endorsement contracts, complimentary product, television, digital and print advertising and media costs, brandevents and retail brand presentation.\\nFISCAL 2023 COMPARED TO FISCAL 2022\\nDemand creation expense increased 5% for fiscal 2023, primarily due to higher advertising and marketing expense and higher sports marketing expense. Changes in\\nforeign currency exchange rates decreased Demand creation expense by approximately 4 percentage points.\\nOperating overhead expense increased 12%, primarily due to higher wage-related expenses, NIKE Direct variable costs, strategic technology enterprise investments and\\nother administrative costs. Changes in foreign currency exchange rates decreased Operating overhead expense by approximately 3 percentage points.\\n(1)\",\n \"Asia Pacific & Latin America 1,932 1,896 2 % 1,530 24 %\\nGlobal Brand Divisions (4,841) (4,262) -14 % (3,656) -17 %\\nTOTAL NIKE BRAND $ 8,359 $ 8,406 -1 % $ 8,641 -3 %\\nConverse 676 669 1 % 543 23 %\\nCorporate (2,840) (2,219) -28 % (2,261) 2 %\\nTOTAL NIKE, INC. EARNINGS BEFORE INTEREST ANDTAXES $ 6,195 $ 6,856 -10 % $ 6,923 -1 %\\nEBIT margin 12.1 % 14.7 % 15.5 %\\nInterest expense (income), net (6) 205 \\u2014 262 \\u2014 \\nTOTAL NIKE, INC. INCOME BEFORE INCOME TAXES $ 6,201 $ 6,651 -7 % $ 6,661 0 %\\n(1) Total NIKE Brand EBIT, Total NIKE, Inc. EBIT and EBIT Margin represent non-GAAP financial measures. See \\\"Use of Non-GAAP Financial Measures\\\" for further information.\\n(1) (1)\\n(2)\\n(3)\\n(4)\\n(1)\\n(1)\\n(1)\\n2023 FORM 10-K 36\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "" ], "text/plain": [ " id vector_distance content\n", - "0 chunk:88 0.337694585323 Asia Pacific & Latin America 1,932 1,896 2 % 1...\n", - "1 chunk:80 0.342052936554 Table of Contents\\nCONSOLIDATED OPERATING RESU...\n", - "2 chunk:87 0.35776078701 Table of Contents\\nOPERATING SEGMENTS\\nAs disc...\n", - "3 chunk:83 0.37876611948 Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 C..." + "0 chunk:87 0.334264576435 Asia Pacific & Latin America 1,932 1,896 2 % 1...\n", + "1 chunk:79 0.350993335247 Table of Contents\\nCONSOLIDA TED OPERA TING RE...\n", + "2 chunk:86 0.371814072132 Table of Contents\\nOPERA TING SEGMENTS\\nAs dis...\n", + "3 chunk:82 0.378765702248 Table of Contents\\nGROSS MARGIN\\nFISCAL 2023 C..." ] }, - "execution_count": 20, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -2603,7 +1297,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2611,25 +1305,11 @@ "id": "_esLGYzbT6LG", "outputId": "d3314a08-8746-4239-dcb2-e7e41b51c640" }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "from redis.asyncio import Redis as AsyncRedis\n", "from redisvl.index import AsyncSearchIndex\n", "\n", - "client = AsyncRedis.from_url(REDIS_URL)\n", - "async_index = AsyncSearchIndex.from_dict(schema)\n", - "await async_index.set_client(client)" + "async_index = AsyncSearchIndex.from_dict(schema, redis_url=REDIS_URL)" ] }, { @@ -2643,7 +1323,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2651,15 +1331,7 @@ "id": "EgdTvz6zJkED", "outputId": "d2ab0e8e-2ecf-458d-881d-6e4658953a71" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OPENAI_API_KEY :··········\n" - ] - } - ], + "outputs": [], "source": [ "import openai\n", "import os\n", @@ -2689,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": { "id": "1V1Tio4-ZjmA" }, @@ -2764,7 +1436,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": { "id": "pn-PoACdbihY" }, @@ -2782,7 +1454,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2848,89 +1520,7 @@ "id": "9M_iU6_hbv0J", "outputId": "b9fc43d9-883a-4795-8a37-8a2f4c545892" }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "22178a562935411f88cad67659ebb7c4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Batches: 0%| | 0/1 [00:00 \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install -q redis \"unstructured[pdf]\" sentence-transformers langchain \n", - "%pip install -q langchain-community langchain-redis langchain-huggingface langchain-openai" + "%pip install -q langchain-community \"langchain-redis>=0.2.0\" langchain-huggingface langchain-openai" ] }, { @@ -185,7 +226,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Listing available documents ... ['resources/eval_dataset_1000_0.json', 'resources/nke-10k-2023.pdf', 'resources/amzn-10k-2023.pdf', 'resources/metrics_2500_0.csv', 'resources/jnj-10k-2023.pdf', 'resources/new_testset.csv', 'resources/aapl-10k-2023.pdf', 'resources/testset_15.csv', 'resources/retrieval_basic_rag_test.csv', 'resources/nvd-10k-2023.pdf', 'resources/msft-10k-2023.pdf', 'resources/propositions.json', 'resources/generation_basic_rag_test.csv']\n" + "Listing available documents ... ['resources/nke-10k-2023.pdf', 'resources/amzn-10k-2023.pdf', 'resources/metrics_2500_0.csv', 'resources/jnj-10k-2023.pdf', 'resources/aapl-10k-2023.pdf', 'resources/testset_15.csv', 'resources/retrieval_basic_rag_test.csv', 'resources/2022-chevy-colorado-ebrochure.pdf', 'resources/nvd-10k-2023.pdf', 'resources/testset.csv', 'resources/msft-10k-2023.pdf', 'resources/propositions.json', 'resources/generation_basic_rag_test.csv']\n" ] } ], @@ -205,11 +246,19 @@ "execution_count": 3, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/_g/rr4lnxxx1_z7m78lz89dhvsm0000gp/T/ipykernel_45325/1931079106.py:8: LangChainDeprecationWarning: The class `UnstructuredFileLoader` was deprecated in LangChain 0.2.8 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-unstructured package and should be used instead. To use it run `pip install -U :class:`~langchain-unstructured` and import as `from :class:`~langchain_unstructured import UnstructuredLoader``.\n", + " loader = UnstructuredFileLoader(\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Done preprocessing. Created 180 chunks of the original pdf resources/nke-10k-2023.pdf\n" + "Done preprocessing. Created 179 chunks of the original pdf resources/nke-10k-2023.pdf\n" ] } ], @@ -439,7 +488,15 @@ "id": "yY69FViAjNv1", "outputId": "ab7b212b-3c55-44b1-cf72-6eb926cf302f" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16:18:04 redisvl.index.index INFO Index already exists, not overwriting.\n" + ] + } + ], "source": [ "from langchain_redis import RedisVectorStore\n", "\n", @@ -474,7 +531,7 @@ { "data": { "text/plain": [ - "180" + "1123" ] }, "execution_count": 6, @@ -499,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "id": "Gv6SxKOB1vmy" }, @@ -510,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -523,16 +580,16 @@ "data": { "text/plain": [ "[(Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"(Dollars in millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit Gross margin\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense % of revenues\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense Effective tax rate\\n\\nNET INCOME Diluted earnings per common share\\n\\n$\\n\\n$ $\\n\\nFISCAL 2023\\n\\n51,217 28,925\\n\\n22,292\\n\\n43.5 %\\n\\n4,060 12,317\\n\\n16,377\\n\\n32.0 % (6)\\n\\n(280) 6,201\\n\\n1,131\\n\\n18.2 %\\n\\n5,070 3.23\\n\\n$\\n\\n$ $\\n\\nFISCAL 2022\\n\\n46,710 25,231\\n\\n21,479\\n\\n46.0 %\\n\\n3,850 10,954\\n\\n14,804\\n\\n31.7 % 205\\n\\n(181) 6,651\\n\\n605 9.1 %\\n\\n6,046 3.75\\n\\n% CHANGE\\n\\n10 % $ 15 %\\n\\n4 %\\n\\n5 % 12 %\\n\\n11 %\\n\\n—\\n\\n— -7 %\\n\\n87 %\\n\\n16 % $ -14 % $\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\n44,538 24,576\\n\\n5 % 3 %\\n\\n19,962\\n\\n8 %\\n\\n44.8 %\\n\\n3,114 9,911\\n\\n24 % 11 %\\n\\n13,025\\n\\n14 %\\n\\n29.2 % 262\\n\\n—\\n\\n14 6,661\\n\\n— 0 %\\n\\n934 14.0 %\\n\\n35 %\\n\\n5,727 3.56\\n\\n6 % 5 %\\n\\n2023 FORM 10-K 31\\n\\nTable of Contents\\n\\nCONSOLIDATED OPERATING RESULTS REVENUES\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\nNIKE, Inc. Revenues:\\n\\nNIKE Brand Revenues by:\\n\\nFootwear Apparel\\n\\n$\\n\\n33,135 $ 13,843\\n\\n29,143 13,567\\n\\n14 % 2 %\\n\\n20 % $ 8 %\\n\\n28,021 12,865\\n\\n4 % 5 %\\n\\nEquipment Global Brand Divisions\\n\\n(2)\\n\\nTotal NIKE Brand Revenues\\n\\n$\\n\\n1,727 58\\n\\n48,763 $\\n\\n1,624 102 44,436\\n\\n6 % -43 % 10 %\\n\\n13 % -43 % 16 % $\\n\\n1,382 25 42,293\\n\\n18 % 308 % 5 %\\n\\nConverse Corporate\\n\\n(3)\\n\\n2,427 27\\n\\n2,346 (72)\\n\\n3 % —\\n\\n8 % —\\n\\n2,205 40\\n\\n6 % —\\n\\nTOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n51,217 $\\n\\n46,710\\n\\n10 %\\n\\n16 % $\\n\\n44,538\\n\\n5 %\\n\\nSupplemental NIKE Brand Revenues Details: NIKE Brand Revenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n27,397 $\\n\\n25,608\\n\\n7 %\\n\\n14 % $\\n\\n25,898\\n\\n1 %\\n\\nSales through NIKE Direct Global Brand Divisions\\n\\n(2)\\n\\n21,308 58\\n\\n18,726 102\\n\\n14 % -43 %\\n\\n20 % -43 %\\n\\n16,370 25\\n\\n14 % 308 %\\n\\nTOTAL NIKE BRAND REVENUES (1) NIKE Brand Revenues on a Wholesale Equivalent Basis :\\n\\n$\\n\\n48,763 $\\n\\n44,436\\n\\n10 %\\n\\n16 % $\\n\\n42,293\\n\\n5 %\\n\\nSales to Wholesale Customers Sales from our Wholesale Operations to NIKE Direct Operations\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES NIKE Brand Wholesale Equivalent Revenues by:\\n\\n(1),(4)\\n\\n$\\n\\n$\\n\\n27,397 $ 12,730\\n\\n40,127 $\\n\\n25,608 10,543\\n\\n36,151\\n\\n7 % 21 %\\n\\n11 %\\n\\n14 % $ 27 %\\n\\n18 % $\\n\\n25,898 9,872\\n\\n35,770\\n\\n1 % 7 % 1 %\\n\\nMen's Women's NIKE Kids'\\n\\n$\\n\\n20,733 $ 8,606 5,038\\n\\n18,797 8,273 4,874\\n\\n10 % 4 % 3 %\\n\\n17 % $ 11 % 10 %\\n\\n18,391 8,225 4,882\\n\\n2 % 1 % 0 %\\n\\nJordan Brand (5) Others\\n\\n6,589 (839)\\n\\n5,122 (915)\\n\\n29 % 8 %\\n\\n35 % -3 %\"),\n", - " 0.49901175499),\n", + " 0.499011814594),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"(Dollars in millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit Gross margin\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense % of revenues\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense Effective tax rate\\n\\nNET INCOME Diluted earnings per common share\\n\\n$\\n\\n$ $\\n\\nFISCAL 2023\\n\\n51,217 28,925\\n\\n22,292\\n\\n43.5 %\\n\\n4,060 12,317\\n\\n16,377\\n\\n32.0 % (6)\\n\\n(280) 6,201\\n\\n1,131\\n\\n18.2 %\\n\\n5,070 3.23\\n\\n$\\n\\n$ $\\n\\nFISCAL 2022\\n\\n46,710 25,231\\n\\n21,479\\n\\n46.0 %\\n\\n3,850 10,954\\n\\n14,804\\n\\n31.7 % 205\\n\\n(181) 6,651\\n\\n605 9.1 %\\n\\n6,046 3.75\\n\\n% CHANGE\\n\\n10 % $ 15 %\\n\\n4 %\\n\\n5 % 12 %\\n\\n11 %\\n\\n—\\n\\n— -7 %\\n\\n87 %\\n\\n16 % $ -14 % $\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\n44,538 24,576\\n\\n5 % 3 %\\n\\n19,962\\n\\n8 %\\n\\n44.8 %\\n\\n3,114 9,911\\n\\n24 % 11 %\\n\\n13,025\\n\\n14 %\\n\\n29.2 % 262\\n\\n—\\n\\n14 6,661\\n\\n— 0 %\\n\\n934 14.0 %\\n\\n35 %\\n\\n5,727 3.56\\n\\n6 % 5 %\\n\\n2023 FORM 10-K 31\\n\\nTable of Contents\\n\\nCONSOLIDATED OPERATING RESULTS REVENUES\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\nNIKE, Inc. Revenues:\\n\\nNIKE Brand Revenues by:\\n\\nFootwear Apparel\\n\\n$\\n\\n33,135 $ 13,843\\n\\n29,143 13,567\\n\\n14 % 2 %\\n\\n20 % $ 8 %\\n\\n28,021 12,865\\n\\n4 % 5 %\\n\\nEquipment Global Brand Divisions\\n\\n(2)\\n\\nTotal NIKE Brand Revenues\\n\\n$\\n\\n1,727 58\\n\\n48,763 $\\n\\n1,624 102 44,436\\n\\n6 % -43 % 10 %\\n\\n13 % -43 % 16 % $\\n\\n1,382 25 42,293\\n\\n18 % 308 % 5 %\\n\\nConverse Corporate\\n\\n(3)\\n\\n2,427 27\\n\\n2,346 (72)\\n\\n3 % —\\n\\n8 % —\\n\\n2,205 40\\n\\n6 % —\\n\\nTOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n51,217 $\\n\\n46,710\\n\\n10 %\\n\\n16 % $\\n\\n44,538\\n\\n5 %\\n\\nSupplemental NIKE Brand Revenues Details: NIKE Brand Revenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n27,397 $\\n\\n25,608\\n\\n7 %\\n\\n14 % $\\n\\n25,898\\n\\n1 %\\n\\nSales through NIKE Direct Global Brand Divisions\\n\\n(2)\\n\\n21,308 58\\n\\n18,726 102\\n\\n14 % -43 %\\n\\n20 % -43 %\\n\\n16,370 25\\n\\n14 % 308 %\\n\\nTOTAL NIKE BRAND REVENUES (1) NIKE Brand Revenues on a Wholesale Equivalent Basis :\\n\\n$\\n\\n48,763 $\\n\\n44,436\\n\\n10 %\\n\\n16 % $\\n\\n42,293\\n\\n5 %\\n\\nSales to Wholesale Customers Sales from our Wholesale Operations to NIKE Direct Operations\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES NIKE Brand Wholesale Equivalent Revenues by:\\n\\n(1),(4)\\n\\n$\\n\\n$\\n\\n27,397 $ 12,730\\n\\n40,127 $\\n\\n25,608 10,543\\n\\n36,151\\n\\n7 % 21 %\\n\\n11 %\\n\\n14 % $ 27 %\\n\\n18 % $\\n\\n25,898 9,872\\n\\n35,770\\n\\n1 % 7 % 1 %\\n\\nMen's Women's NIKE Kids'\\n\\n$\\n\\n20,733 $ 8,606 5,038\\n\\n18,797 8,273 4,874\\n\\n10 % 4 % 3 %\\n\\n17 % $ 11 % 10 %\\n\\n18,391 8,225 4,882\\n\\n2 % 1 % 0 %\\n\\nJordan Brand (5) Others\\n\\n6,589 (839)\\n\\n5,122 (915)\\n\\n29 % 8 %\\n\\n35 % -3 %\"),\n", + " 0.499011814594),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"(Dollars in millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit Gross margin\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense % of revenues\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense Effective tax rate\\n\\nNET INCOME Diluted earnings per common share\\n\\n$\\n\\n$ $\\n\\nFISCAL 2023\\n\\n51,217 28,925\\n\\n22,292\\n\\n43.5 %\\n\\n4,060 12,317\\n\\n16,377\\n\\n32.0 % (6)\\n\\n(280) 6,201\\n\\n1,131\\n\\n18.2 %\\n\\n5,070 3.23\\n\\n$\\n\\n$ $\\n\\nFISCAL 2022\\n\\n46,710 25,231\\n\\n21,479\\n\\n46.0 %\\n\\n3,850 10,954\\n\\n14,804\\n\\n31.7 % 205\\n\\n(181) 6,651\\n\\n605 9.1 %\\n\\n6,046 3.75\\n\\n% CHANGE\\n\\n10 % $ 15 %\\n\\n4 %\\n\\n5 % 12 %\\n\\n11 %\\n\\n—\\n\\n— -7 %\\n\\n87 %\\n\\n16 % $ -14 % $\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\n44,538 24,576\\n\\n5 % 3 %\\n\\n19,962\\n\\n8 %\\n\\n44.8 %\\n\\n3,114 9,911\\n\\n24 % 11 %\\n\\n13,025\\n\\n14 %\\n\\n29.2 % 262\\n\\n—\\n\\n14 6,661\\n\\n— 0 %\\n\\n934 14.0 %\\n\\n35 %\\n\\n5,727 3.56\\n\\n6 % 5 %\\n\\n2023 FORM 10-K 31\\n\\nTable of Contents\\n\\nCONSOLIDATED OPERATING RESULTS REVENUES\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\nNIKE, Inc. Revenues:\\n\\nNIKE Brand Revenues by:\\n\\nFootwear Apparel\\n\\n$\\n\\n33,135 $ 13,843\\n\\n29,143 13,567\\n\\n14 % 2 %\\n\\n20 % $ 8 %\\n\\n28,021 12,865\\n\\n4 % 5 %\\n\\nEquipment Global Brand Divisions\\n\\n(2)\\n\\nTotal NIKE Brand Revenues\\n\\n$\\n\\n1,727 58\\n\\n48,763 $\\n\\n1,624 102 44,436\\n\\n6 % -43 % 10 %\\n\\n13 % -43 % 16 % $\\n\\n1,382 25 42,293\\n\\n18 % 308 % 5 %\\n\\nConverse Corporate\\n\\n(3)\\n\\n2,427 27\\n\\n2,346 (72)\\n\\n3 % —\\n\\n8 % —\\n\\n2,205 40\\n\\n6 % —\\n\\nTOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n51,217 $\\n\\n46,710\\n\\n10 %\\n\\n16 % $\\n\\n44,538\\n\\n5 %\\n\\nSupplemental NIKE Brand Revenues Details: NIKE Brand Revenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n27,397 $\\n\\n25,608\\n\\n7 %\\n\\n14 % $\\n\\n25,898\\n\\n1 %\\n\\nSales through NIKE Direct Global Brand Divisions\\n\\n(2)\\n\\n21,308 58\\n\\n18,726 102\\n\\n14 % -43 %\\n\\n20 % -43 %\\n\\n16,370 25\\n\\n14 % 308 %\\n\\nTOTAL NIKE BRAND REVENUES (1) NIKE Brand Revenues on a Wholesale Equivalent Basis :\\n\\n$\\n\\n48,763 $\\n\\n44,436\\n\\n10 %\\n\\n16 % $\\n\\n42,293\\n\\n5 %\\n\\nSales to Wholesale Customers Sales from our Wholesale Operations to NIKE Direct Operations\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES NIKE Brand Wholesale Equivalent Revenues by:\\n\\n(1),(4)\\n\\n$\\n\\n$\\n\\n27,397 $ 12,730\\n\\n40,127 $\\n\\n25,608 10,543\\n\\n36,151\\n\\n7 % 21 %\\n\\n11 %\\n\\n14 % $ 27 %\\n\\n18 % $\\n\\n25,898 9,872\\n\\n35,770\\n\\n1 % 7 % 1 %\\n\\nMen's Women's NIKE Kids'\\n\\n$\\n\\n20,733 $ 8,606 5,038\\n\\n18,797 8,273 4,874\\n\\n10 % 4 % 3 %\\n\\n17 % $ 11 % 10 %\\n\\n18,391 8,225 4,882\\n\\n2 % 1 % 0 %\\n\\nJordan Brand (5) Others\\n\\n6,589 (839)\\n\\n5,122 (915)\\n\\n29 % 8 %\\n\\n35 % -3 %\"),\n", + " 0.499011814594),\n", " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Tax (expense) benefit Gain (loss) net of tax\\n\\n5 (14)\\n\\n(9) 22\\n\\nTotal net gain (loss) reclassified for the period\\n\\n$\\n\\n463 $\\n\\n30\\n\\n2023 FORM 10-K 82\\n\\nTable of Contents\\n\\nNOTE 14 — REVENUES\\n\\nDISAGGREGATION OF REVENUES The following tables present the Company's Revenues disaggregated by reportable operating segment, major product line and distribution channel:\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nYEAR ENDED MAY 31, 2023 ASIA PACIFIC & LATIN (1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nAMERICA\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear\\n\\n$\\n\\n14,897 $\\n\\n8,260 $\\n\\n5,435 $\\n\\n4,543 $\\n\\n— $\\n\\n33,135 $\\n\\n2,155 $\\n\\n— $\\n\\n35,290\\n\\nApparel Equipment Other\\n\\n5,947 764 —\\n\\n4,566 592 —\\n\\n1,666 147 —\\n\\n1,664 224 —\\n\\n— — 58\\n\\n13,843 1,727 58\\n\\n90 28 154\\n\\n— — 27\\n\\n13,933 1,755 239\\n\\nTOTAL REVENUES\\n\\n$\\n\\n21,608 $\\n\\n13,418 $\\n\\n7,248 $\\n\\n6,431 $\\n\\n58 $\\n\\n48,763 $\\n\\n2,427 $\\n\\n27 $\\n\\n51,217\\n\\nRevenues by:\\n\\nSales to Wholesale Customers Sales through Direct to Consumer\\n\\n$\\n\\n11,273 $ 10,335\\n\\n8,522 $ 4,896\\n\\n3,866 $ 3,382\\n\\n3,736 $ 2,695\\n\\n— $ —\\n\\n27,397 $ 21,308\\n\\n1,299 $ 974\\n\\n— $ —\\n\\n28,696 22,282\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n21,608 $\\n\\n—\\n\\n13,418 $\\n\\n— 7,248 $\\n\\n— 6,431 $\\n\\n58 58 $\\n\\n58\\n\\n48,763 $\\n\\n154 2,427 $\\n\\n27 27 $\\n\\n239 51,217\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand businesses in its CASA territory to third-party distributors.\\n\\nYEAR ENDED MAY 31, 2022\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nASIA PACIFIC & LATIN AMERICA\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear Apparel\\n\\n$\\n\\n12,228 $ 5,492\\n\\n7,388 $ 4,527\\n\\n5,416 $ 1,938\\n\\n4,111 $ 1,610\\n\\n— $ —\\n\\n29,143 $ 13,567\\n\\n2,094 $ 103\\n\\n— $ —\\n\\n31,237 13,670\\n\\nEquipment Other\\n\\n633 —\\n\\n564 —\\n\\n193 —\\n\\n234 —\\n\\n— 102\\n\\n1,624 102\\n\\n26 123\\n\\n— (72)\\n\\n1,650 153\\n\\nTOTAL REVENUES Revenues by:\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\nSales to Wholesale Customers Sales through Direct to Consumer Other\\n\\n$\\n\\n9,621 $ 8,732 —\\n\\n8,377 $ 4,102 —\\n\\n4,081 $ 3,466 —\\n\\n3,529 $ 2,426 —\\n\\n— $ — 102\\n\\n25,608 $ 18,726 102\\n\\n1,292 $ 931 123\\n\\n— $ — (72)\\n\\n26,900 19,657 153\\n\\nTOTAL REVENUES\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\n2023 FORM 10-K 83\\n\\nTable of Contents\\n\\nYEAR ENDED MAY 31, 2021\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\"),\n", - " 0.529602944851),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"NIKE, INC. CONSOLIDATED STATEMENTS OF INCOME\\n\\n(In millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense NET INCOME\\n\\nEarnings per common share:\\n\\nBasic Diluted\\n\\nWeighted average common shares outstanding:\\n\\nBasic Diluted\\n\\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n\\n$\\n\\n$\\n\\n$ $\\n\\nYEAR ENDED MAY 31,\\n\\n2023\\n\\n2022\\n\\n2021\\n\\n51,217 $ 28,925\\n\\n46,710 $ 25,231\\n\\n44,538 24,576\\n\\n22,292 4,060 12,317\\n\\n21,479 3,850 10,954\\n\\n19,962 3,114 9,911\\n\\n16,377 (6)\\n\\n14,804 205\\n\\n13,025 262\\n\\n(280) 6,201\\n\\n(181) 6,651\\n\\n14 6,661\\n\\n1,131 5,070 $\\n\\n605 6,046 $\\n\\n934 5,727\\n\\n3.27 $ 3.23 $\\n\\n3.83 $ 3.75 $\\n\\n3.64 3.56\\n\\n1,551.6 1,569.8\\n\\n1,578.8 1,610.8\\n\\n1,573.0 1,609.4\\n\\n2023 FORM 10-K 55\\n\\nTable of Contents\\n\\nNIKE, INC. CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME\\n\\nYEAR ENDED MAY 31,\\n\\n(Dollars in millions)\\n\\n2023\\n\\n2022\\n\\nNet income Other comprehensive income (loss), net of tax:\\n\\n$\\n\\n5,070 $\\n\\n6,046 $\\n\\nChange in net foreign currency translation adjustment\\n\\n267\\n\\n(522)\\n\\nChange in net gains (losses) on cash flow hedges Change in net gains (losses) on other\\n\\n(348) (6)\\n\\n1,214 6\\n\\nTotal other comprehensive income (loss), net of tax TOTAL COMPREHENSIVE INCOME\\n\\n$\\n\\n(87) 4,983 $\\n\\n698 6,744 $\\n\\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n\\n2023 FORM 10-K 56\\n\\n2021\\n\\n5,727\\n\\n496\\n\\n(825) 5\\n\\n(324) 5,403\\n\\nTable of Contents\\n\\nNIKE, INC. CONSOLIDATED BALANCE SHEETS\\n\\n(In millions)\\n\\nASSETS\\n\\nCurrent assets:\\n\\nCash and equivalents Short-term investments\\n\\nAccounts receivable, net Inventories Prepaid expenses and other current assets\\n\\nTotal current assets\\n\\nProperty, plant and equipment, net\\n\\nOperating lease right-of-use assets, net Identifiable intangible assets, net Goodwill\\n\\nDeferred income taxes and other assets\\n\\nTOTAL ASSETS\\n\\nLIABILITIES AND SHAREHOLDERS' EQUITY Current liabilities:\\n\\nCurrent portion of long-term debt Notes payable Accounts payable\\n\\nCurrent portion of operating lease liabilities Accrued liabilities Income taxes payable\\n\\nTotal current liabilities\\n\\nLong-term debt\\n\\nOperating lease liabilities Deferred income taxes and other liabilities Commitments and contingencies (Note 16)\\n\\nRedeemable preferred stock Shareholders' equity: Common stock at stated value:\"),\n", - " 0.560668945312),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity in\\n\\nthe prior period resulting from lower available inventory supply;\\n\\nUnfavorable changes in net foreign currency exchange rates, including hedges; and\\n\\nLower off-price margin, on a wholesale equivalent basis.\\n\\nThis was partially offset by:\\n\\nHigher NIKE Brand full-price ASP, net of discounts, on a wholesale equivalent basis, due primarily to strategic pricing actions and product mix; and\\n\\nLower other costs, primarily due to higher inventory obsolescence reserves recognized in Greater China in the fourth quarter of fiscal 2022.\\n\\nTOTAL SELLING AND ADMINISTRATIVE EXPENSE\\n\\n(Dollars in millions)\\n\\nDemand creation expense Operating overhead expense\\n\\n(1)\\n\\n$\\n\\nFISCAL 2023 4,060 12,317\\n\\n$\\n\\nFISCAL 2022 3,850 10,954\\n\\n% CHANGE\\n\\n5 % $\\n\\n12 %\\n\\nFISCAL 2021 3,114 9,911\\n\\nTotal selling and administrative expense\\n\\n% of revenues\\n\\n$\\n\\n16,377\\n\\n32.0 %\\n\\n$\\n\\n14,804\\n\\n31.7 %\\n\\n11 % $ 30 bps\\n\\n13,025\\n\\n29.2 %\\n\\n(1) Demand creation expense consists of advertising and promotion costs, including costs of endorsement contracts, complimentary product, television, digital and print advertising and media costs, brand\\n\\nevents and retail brand presentation.\\n\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n\\nDemand creation expense increased 5% for fiscal 2023, primarily due to higher advertising and marketing expense and higher sports marketing expense. Changes in foreign currency exchange rates decreased Demand creation expense by approximately 4 percentage points.\\n\\nOperating overhead expense increased 12%, primarily due to higher wage-related expenses, NIKE Direct variable costs, strategic technology enterprise investments and other administrative costs. Changes in foreign currency exchange rates decreased Operating overhead expense by approximately 3 percentage points.\\n\\n2023 FORM 10-K 34\\n\\n% CHANGE\\n\\n24 % 11 %\\n\\n14 % 250 bps\\n\\nTable of Contents\\n\\nOTHER (INCOME) EXPENSE, NET\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\nFISCAL 2021\\n\\nOther (income) expense, net\\n\\n$\\n\\n(280) $\\n\\n(181) $\\n\\n14\\n\\nOther (income) expense, net comprises foreign currency conversion gains and losses from the remeasurement of monetary assets and liabilities denominated in non- functional currencies and the impact of certain foreign currency derivative instruments, as well as unusual or non-operating transactions that are outside the normal course of business.'),\n", - " 0.574473142624)]" + " 0.529603242874)]" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -544,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -557,16 +614,16 @@ "data": { "text/plain": [ "[(Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"(Dollars in millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit Gross margin\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense % of revenues\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense Effective tax rate\\n\\nNET INCOME Diluted earnings per common share\\n\\n$\\n\\n$ $\\n\\nFISCAL 2023\\n\\n51,217 28,925\\n\\n22,292\\n\\n43.5 %\\n\\n4,060 12,317\\n\\n16,377\\n\\n32.0 % (6)\\n\\n(280) 6,201\\n\\n1,131\\n\\n18.2 %\\n\\n5,070 3.23\\n\\n$\\n\\n$ $\\n\\nFISCAL 2022\\n\\n46,710 25,231\\n\\n21,479\\n\\n46.0 %\\n\\n3,850 10,954\\n\\n14,804\\n\\n31.7 % 205\\n\\n(181) 6,651\\n\\n605 9.1 %\\n\\n6,046 3.75\\n\\n% CHANGE\\n\\n10 % $ 15 %\\n\\n4 %\\n\\n5 % 12 %\\n\\n11 %\\n\\n—\\n\\n— -7 %\\n\\n87 %\\n\\n16 % $ -14 % $\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\n44,538 24,576\\n\\n5 % 3 %\\n\\n19,962\\n\\n8 %\\n\\n44.8 %\\n\\n3,114 9,911\\n\\n24 % 11 %\\n\\n13,025\\n\\n14 %\\n\\n29.2 % 262\\n\\n—\\n\\n14 6,661\\n\\n— 0 %\\n\\n934 14.0 %\\n\\n35 %\\n\\n5,727 3.56\\n\\n6 % 5 %\\n\\n2023 FORM 10-K 31\\n\\nTable of Contents\\n\\nCONSOLIDATED OPERATING RESULTS REVENUES\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\nNIKE, Inc. Revenues:\\n\\nNIKE Brand Revenues by:\\n\\nFootwear Apparel\\n\\n$\\n\\n33,135 $ 13,843\\n\\n29,143 13,567\\n\\n14 % 2 %\\n\\n20 % $ 8 %\\n\\n28,021 12,865\\n\\n4 % 5 %\\n\\nEquipment Global Brand Divisions\\n\\n(2)\\n\\nTotal NIKE Brand Revenues\\n\\n$\\n\\n1,727 58\\n\\n48,763 $\\n\\n1,624 102 44,436\\n\\n6 % -43 % 10 %\\n\\n13 % -43 % 16 % $\\n\\n1,382 25 42,293\\n\\n18 % 308 % 5 %\\n\\nConverse Corporate\\n\\n(3)\\n\\n2,427 27\\n\\n2,346 (72)\\n\\n3 % —\\n\\n8 % —\\n\\n2,205 40\\n\\n6 % —\\n\\nTOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n51,217 $\\n\\n46,710\\n\\n10 %\\n\\n16 % $\\n\\n44,538\\n\\n5 %\\n\\nSupplemental NIKE Brand Revenues Details: NIKE Brand Revenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n27,397 $\\n\\n25,608\\n\\n7 %\\n\\n14 % $\\n\\n25,898\\n\\n1 %\\n\\nSales through NIKE Direct Global Brand Divisions\\n\\n(2)\\n\\n21,308 58\\n\\n18,726 102\\n\\n14 % -43 %\\n\\n20 % -43 %\\n\\n16,370 25\\n\\n14 % 308 %\\n\\nTOTAL NIKE BRAND REVENUES (1) NIKE Brand Revenues on a Wholesale Equivalent Basis :\\n\\n$\\n\\n48,763 $\\n\\n44,436\\n\\n10 %\\n\\n16 % $\\n\\n42,293\\n\\n5 %\\n\\nSales to Wholesale Customers Sales from our Wholesale Operations to NIKE Direct Operations\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES NIKE Brand Wholesale Equivalent Revenues by:\\n\\n(1),(4)\\n\\n$\\n\\n$\\n\\n27,397 $ 12,730\\n\\n40,127 $\\n\\n25,608 10,543\\n\\n36,151\\n\\n7 % 21 %\\n\\n11 %\\n\\n14 % $ 27 %\\n\\n18 % $\\n\\n25,898 9,872\\n\\n35,770\\n\\n1 % 7 % 1 %\\n\\nMen's Women's NIKE Kids'\\n\\n$\\n\\n20,733 $ 8,606 5,038\\n\\n18,797 8,273 4,874\\n\\n10 % 4 % 3 %\\n\\n17 % $ 11 % 10 %\\n\\n18,391 8,225 4,882\\n\\n2 % 1 % 0 %\\n\\nJordan Brand (5) Others\\n\\n6,589 (839)\\n\\n5,122 (915)\\n\\n29 % 8 %\\n\\n35 % -3 %\"),\n", - " 0.49901175499),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. This increase was primarily due to higher revenues in Men\\'s, the Jordan Brand, Women\\'s and Kids\\' which grew 17%, 35%,11% and 10%, respectively, on a wholesale equivalent basis.\\n\\nNIKE Brand footwear revenues increased 20% on a currency-neutral basis, due to higher revenues in Men\\'s, the Jordan Brand, Women\\'s and Kids\\'. Unit sales of footwear increased 13%, while higher average selling price (\"ASP\") per pair contributed approximately 7 percentage points of footwear revenue growth. Higher ASP was primarily due to higher full-price ASP, net of discounts, on a wholesale equivalent basis, and growth in the size of our NIKE Direct business, partially offset by lower NIKE Direct ASP.\\n\\nNIKE Brand apparel revenues increased 8% on a currency-neutral basis, primarily due to higher revenues in Men\\'s. Unit sales of apparel increased 4%, while higher ASP per unit contributed approximately 4 percentage points of apparel revenue growth. Higher ASP was primarily due to higher full-price ASP and growth in the size of our NIKE Direct business, partially offset by lower NIKE Direct ASP, reflecting higher promotional activity.\\n\\nNIKE Direct revenues increased 14% from $18.7 billion in fiscal 2022 to $21.3 billion in fiscal 2023. On a currency-neutral basis, NIKE Direct revenues increased 20% primarily driven by NIKE Brand Digital sales growth of 24%, comparable store sales growth of 14% and the addition of new stores. For further information regarding comparable store sales, including the definition, see \"Comparable Store Sales\". NIKE Brand Digital sales were $12.6 billion for fiscal 2023 compared to $10.7 billion for fiscal 2022.\\n\\n2023 FORM 10-K 33\\n\\nTable of Contents\\n\\nGROSS MARGIN FISCAL 2023 COMPARED TO FISCAL 2022\\n\\nFor fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to 43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:\\n\\nWholesale equivalent\\n\\nThe decrease in gross margin for fiscal 2023 was primarily due to:\\n\\nHigher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as\\n\\nproduct mix;'),\n", - " 0.650711655617),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='131.10 115.56 126.97\\n\\n(1) Includes an immaterial amount of PSU transactions\\n\\nThe weighted average fair value per share of restricted stock and restricted stock units granted for the fiscal years ended May 31, 2023, 2022 and 2021, computed as of the grant date, was $115.56, $168.04 and $113.84, respectively. During the fiscal years ended May 31, 2023, 2022 and 2021, the aggregate fair value of vested restricted stock and restricted stock units was $250 million, $354 million and $310 million, respectively, computed as of the date of vesting.\\n\\nAs of May 31, 2023, the Company had $649 million of unrecognized compensation costs from restricted stock and restricted stock units, net of estimated forfeitures, to be recognized in Cost of sales or Operating overhead expense, as applicable, over a weighted average remaining period of 2.3 years.\\n\\n2023 FORM 10-K 76\\n\\nTable of Contents\\n\\nNOTE 10 — EARNINGS PER SHARE\\n\\nThe following is a reconciliation from basic earnings per common share to diluted earnings per common share. The computations of diluted earnings per common share excluded restricted stock, restricted stock units and options, including shares under ESPPs, to purchase an estimated additional 31.7 million, 9.4 million and 11.3 million shares of common stock outstanding for the fiscal years ended May 31, 2023, 2022 and 2021, respectively, because the awards were assumed to be anti-dilutive.\\n\\nYEAR ENDED MAY 31,\\n\\n(In millions, except per share data)\\n\\n2023\\n\\n2022\\n\\n2021\\n\\nNet income available to common stockholders\\n\\n$\\n\\n5,070 $\\n\\n6,046 $\\n\\n5,727\\n\\nDetermination of shares:\\n\\nWeighted average common shares outstanding Assumed conversion of dilutive stock options and awards\\n\\n1,551.6 18.2\\n\\n1,578.8 32.0\\n\\n1,573.0 36.4\\n\\nDILUTED WEIGHTED AVERAGE COMMON SHARES OUTSTANDING\\n\\n1,569.8\\n\\n1,610.8\\n\\n1,609.4\\n\\nEarnings per common share:\\n\\nBasic Diluted\\n\\n$ $\\n\\n3.27 $ 3.23 $\\n\\n3.83 $ 3.75 $\\n\\n3.64 3.56\\n\\nNOTE 11 — BENEFIT PLANS\\n\\nThe Company has a qualified 401(k) Savings and Profit Sharing Plan, in which all U.S. employees are able to participate. The Company matches a portion of employee contributions to the savings plan. Company contributions to the savings plan were $136 million, $126 million and $110 million and included in Cost of sales or Operating overhead expense, as applicable, for the fiscal years ended May 31, 2023, 2022 and 2021, respectively.'),\n", - " 0.689424514771),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='Because contract manufacturers make a majority of our products outside of our principal sales markets, our products must be transported by third parties over large geographic distances. Delays in the shipment or delivery of our products due to the availability of transportation, container shortages, labor shortages, including work stoppages or port strikes, infrastructure and port congestion or other factors, and costs and delays associated with consolidating or transitioning between manufacturers, have adversely impacted, and could in the future adversely impact the availability of our products and, in turn, our financial performance. In addition, delays in the shipment or delivery of our products, manufacturing delays or unexpected demand for our products have required us, and may in the future require us to use faster, but more expensive, transportation methods such as air freight, which could adversely affect our profit margins. The cost of oil is a significant component in manufacturing and transportation costs, so increases in the price of petroleum products can adversely affect our profit margins. Changes in U.S. trade policies, including modifications to import tariffs and existing trade policies and agreements, have also had, and could continue to have a significant impact on our activities in foreign jurisdictions, and could adversely affect our reputation or results of operations.\\n\\nOur success depends on our global distribution facilities.'),\n", - " 0.73232448101)]" + " 0.499011814594),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"(Dollars in millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit Gross margin\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense % of revenues\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense Effective tax rate\\n\\nNET INCOME Diluted earnings per common share\\n\\n$\\n\\n$ $\\n\\nFISCAL 2023\\n\\n51,217 28,925\\n\\n22,292\\n\\n43.5 %\\n\\n4,060 12,317\\n\\n16,377\\n\\n32.0 % (6)\\n\\n(280) 6,201\\n\\n1,131\\n\\n18.2 %\\n\\n5,070 3.23\\n\\n$\\n\\n$ $\\n\\nFISCAL 2022\\n\\n46,710 25,231\\n\\n21,479\\n\\n46.0 %\\n\\n3,850 10,954\\n\\n14,804\\n\\n31.7 % 205\\n\\n(181) 6,651\\n\\n605 9.1 %\\n\\n6,046 3.75\\n\\n% CHANGE\\n\\n10 % $ 15 %\\n\\n4 %\\n\\n5 % 12 %\\n\\n11 %\\n\\n—\\n\\n— -7 %\\n\\n87 %\\n\\n16 % $ -14 % $\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\n44,538 24,576\\n\\n5 % 3 %\\n\\n19,962\\n\\n8 %\\n\\n44.8 %\\n\\n3,114 9,911\\n\\n24 % 11 %\\n\\n13,025\\n\\n14 %\\n\\n29.2 % 262\\n\\n—\\n\\n14 6,661\\n\\n— 0 %\\n\\n934 14.0 %\\n\\n35 %\\n\\n5,727 3.56\\n\\n6 % 5 %\\n\\n2023 FORM 10-K 31\\n\\nTable of Contents\\n\\nCONSOLIDATED OPERATING RESULTS REVENUES\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\nNIKE, Inc. Revenues:\\n\\nNIKE Brand Revenues by:\\n\\nFootwear Apparel\\n\\n$\\n\\n33,135 $ 13,843\\n\\n29,143 13,567\\n\\n14 % 2 %\\n\\n20 % $ 8 %\\n\\n28,021 12,865\\n\\n4 % 5 %\\n\\nEquipment Global Brand Divisions\\n\\n(2)\\n\\nTotal NIKE Brand Revenues\\n\\n$\\n\\n1,727 58\\n\\n48,763 $\\n\\n1,624 102 44,436\\n\\n6 % -43 % 10 %\\n\\n13 % -43 % 16 % $\\n\\n1,382 25 42,293\\n\\n18 % 308 % 5 %\\n\\nConverse Corporate\\n\\n(3)\\n\\n2,427 27\\n\\n2,346 (72)\\n\\n3 % —\\n\\n8 % —\\n\\n2,205 40\\n\\n6 % —\\n\\nTOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n51,217 $\\n\\n46,710\\n\\n10 %\\n\\n16 % $\\n\\n44,538\\n\\n5 %\\n\\nSupplemental NIKE Brand Revenues Details: NIKE Brand Revenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n27,397 $\\n\\n25,608\\n\\n7 %\\n\\n14 % $\\n\\n25,898\\n\\n1 %\\n\\nSales through NIKE Direct Global Brand Divisions\\n\\n(2)\\n\\n21,308 58\\n\\n18,726 102\\n\\n14 % -43 %\\n\\n20 % -43 %\\n\\n16,370 25\\n\\n14 % 308 %\\n\\nTOTAL NIKE BRAND REVENUES (1) NIKE Brand Revenues on a Wholesale Equivalent Basis :\\n\\n$\\n\\n48,763 $\\n\\n44,436\\n\\n10 %\\n\\n16 % $\\n\\n42,293\\n\\n5 %\\n\\nSales to Wholesale Customers Sales from our Wholesale Operations to NIKE Direct Operations\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES NIKE Brand Wholesale Equivalent Revenues by:\\n\\n(1),(4)\\n\\n$\\n\\n$\\n\\n27,397 $ 12,730\\n\\n40,127 $\\n\\n25,608 10,543\\n\\n36,151\\n\\n7 % 21 %\\n\\n11 %\\n\\n14 % $ 27 %\\n\\n18 % $\\n\\n25,898 9,872\\n\\n35,770\\n\\n1 % 7 % 1 %\\n\\nMen's Women's NIKE Kids'\\n\\n$\\n\\n20,733 $ 8,606 5,038\\n\\n18,797 8,273 4,874\\n\\n10 % 4 % 3 %\\n\\n17 % $ 11 % 10 %\\n\\n18,391 8,225 4,882\\n\\n2 % 1 % 0 %\\n\\nJordan Brand (5) Others\\n\\n6,589 (839)\\n\\n5,122 (915)\\n\\n29 % 8 %\\n\\n35 % -3 %\"),\n", + " 0.499011814594),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"(Dollars in millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit Gross margin\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense % of revenues\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense Effective tax rate\\n\\nNET INCOME Diluted earnings per common share\\n\\n$\\n\\n$ $\\n\\nFISCAL 2023\\n\\n51,217 28,925\\n\\n22,292\\n\\n43.5 %\\n\\n4,060 12,317\\n\\n16,377\\n\\n32.0 % (6)\\n\\n(280) 6,201\\n\\n1,131\\n\\n18.2 %\\n\\n5,070 3.23\\n\\n$\\n\\n$ $\\n\\nFISCAL 2022\\n\\n46,710 25,231\\n\\n21,479\\n\\n46.0 %\\n\\n3,850 10,954\\n\\n14,804\\n\\n31.7 % 205\\n\\n(181) 6,651\\n\\n605 9.1 %\\n\\n6,046 3.75\\n\\n% CHANGE\\n\\n10 % $ 15 %\\n\\n4 %\\n\\n5 % 12 %\\n\\n11 %\\n\\n—\\n\\n— -7 %\\n\\n87 %\\n\\n16 % $ -14 % $\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\n44,538 24,576\\n\\n5 % 3 %\\n\\n19,962\\n\\n8 %\\n\\n44.8 %\\n\\n3,114 9,911\\n\\n24 % 11 %\\n\\n13,025\\n\\n14 %\\n\\n29.2 % 262\\n\\n—\\n\\n14 6,661\\n\\n— 0 %\\n\\n934 14.0 %\\n\\n35 %\\n\\n5,727 3.56\\n\\n6 % 5 %\\n\\n2023 FORM 10-K 31\\n\\nTable of Contents\\n\\nCONSOLIDATED OPERATING RESULTS REVENUES\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\nFISCAL 2021\\n\\n% CHANGE\\n\\nNIKE, Inc. Revenues:\\n\\nNIKE Brand Revenues by:\\n\\nFootwear Apparel\\n\\n$\\n\\n33,135 $ 13,843\\n\\n29,143 13,567\\n\\n14 % 2 %\\n\\n20 % $ 8 %\\n\\n28,021 12,865\\n\\n4 % 5 %\\n\\nEquipment Global Brand Divisions\\n\\n(2)\\n\\nTotal NIKE Brand Revenues\\n\\n$\\n\\n1,727 58\\n\\n48,763 $\\n\\n1,624 102 44,436\\n\\n6 % -43 % 10 %\\n\\n13 % -43 % 16 % $\\n\\n1,382 25 42,293\\n\\n18 % 308 % 5 %\\n\\nConverse Corporate\\n\\n(3)\\n\\n2,427 27\\n\\n2,346 (72)\\n\\n3 % —\\n\\n8 % —\\n\\n2,205 40\\n\\n6 % —\\n\\nTOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n51,217 $\\n\\n46,710\\n\\n10 %\\n\\n16 % $\\n\\n44,538\\n\\n5 %\\n\\nSupplemental NIKE Brand Revenues Details: NIKE Brand Revenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n27,397 $\\n\\n25,608\\n\\n7 %\\n\\n14 % $\\n\\n25,898\\n\\n1 %\\n\\nSales through NIKE Direct Global Brand Divisions\\n\\n(2)\\n\\n21,308 58\\n\\n18,726 102\\n\\n14 % -43 %\\n\\n20 % -43 %\\n\\n16,370 25\\n\\n14 % 308 %\\n\\nTOTAL NIKE BRAND REVENUES (1) NIKE Brand Revenues on a Wholesale Equivalent Basis :\\n\\n$\\n\\n48,763 $\\n\\n44,436\\n\\n10 %\\n\\n16 % $\\n\\n42,293\\n\\n5 %\\n\\nSales to Wholesale Customers Sales from our Wholesale Operations to NIKE Direct Operations\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES NIKE Brand Wholesale Equivalent Revenues by:\\n\\n(1),(4)\\n\\n$\\n\\n$\\n\\n27,397 $ 12,730\\n\\n40,127 $\\n\\n25,608 10,543\\n\\n36,151\\n\\n7 % 21 %\\n\\n11 %\\n\\n14 % $ 27 %\\n\\n18 % $\\n\\n25,898 9,872\\n\\n35,770\\n\\n1 % 7 % 1 %\\n\\nMen's Women's NIKE Kids'\\n\\n$\\n\\n20,733 $ 8,606 5,038\\n\\n18,797 8,273 4,874\\n\\n10 % 4 % 3 %\\n\\n17 % $ 11 % 10 %\\n\\n18,391 8,225 4,882\\n\\n2 % 1 % 0 %\\n\\nJordan Brand (5) Others\\n\\n6,589 (839)\\n\\n5,122 (915)\\n\\n29 % 8 %\\n\\n35 % -3 %\"),\n", + " 0.499011814594),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='From time to time, we may invest in technology, business infrastructure, new businesses or capabilities, product offering and manufacturing innovation and expansion of existing businesses, such as our NIKE Direct operations, which require substantial cash investments and management attention. We believe cost-effective investments are essential to business growth and profitability; however, significant investments are subject to typical risks and uncertainties inherent in developing a new business or expanding an existing business. The failure of any significant investment to provide expected returns or profitability could have a material adverse effect on our financial results and divert management attention from more profitable business operations. See also \"Our NIKE Direct operations have required and will continue to require a substantial investment and commitment of resources and are subject to numerous risks and uncertainties.\"\\n\\nThe sale of a large number of shares of common stock by our principal shareholder could depress the market price of our common stock.\\n\\nAs of June 30, 2023, Swoosh, LLC beneficially owned approximately 77% of our Class A Common Stock. If, on June 30, 2023, all of these shares were converted into Class B Common Stock, Swoosh, LLC\\'s commensurate ownership percentage of our Class B Common Stock would be approximately 16%. The shares are available for resale, subject to the requirements of the U.S. securities laws and the terms of the limited liability company agreement governing Swoosh, LLC. The sale or prospect of a sale of a substantial number of these shares could have an adverse effect on the market price of our common stock. Swoosh, LLC was formed by Philip H. Knight, our Chairman Emeritus, to hold the majority of his shares of Class A Common Stock. Mr. Knight does not have voting rights with respect to Swoosh, LLC, although Travis Knight, his son and a NIKE director, has a significant role in the management of the Class A Common Stock owned by Swoosh, LLC.\\n\\nChanges in our credit ratings or macroeconomic conditions may affect our liquidity, increasing borrowing costs and limiting our financing options.'),\n", + " 0.604557394981)]" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -579,7 +636,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -591,17 +648,17 @@ { "data": { "text/plain": [ - "[(Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='4,780 (508)\\n\\n7 % -80 %\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES\\n\\n$\\n\\n40,127 $\\n\\n36,151\\n\\n11 %\\n\\n18 % $\\n\\n35,770\\n\\n1 %\\n\\n(1)\\n\\nThe percent change excluding currency changes and the presentation of wholesale equivalent revenues represent non-GAAP financial measures. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(3) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\n(4)\\n\\nAs a result of the Consumer Direct Acceleration strategy, announced in fiscal 2021, the Company is now organized around a consumer construct of Men\\'s, Women\\'s and Kids\\'. Beginning in the first quarter of fiscal 2022, unisex products are classified within Men\\'s, and Jordan Brand revenues are separately reported. Certain prior year amounts were reclassified to conform to fiscal 2022 presentation. These changes had no impact on previously reported consolidated results of operations or shareholders\\' equity.\\n\\n(5) Others include products not allocated to Men\\'s, Women\\'s, NIKE Kids\\' and Jordan Brand, as well as certain adjustments that are not allocated to products designated by consumer.\\n\\n2023 FORM 10-K 32\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\n4 % 6 %\\n\\n18 % 302 % 6 %\\n\\n7 % —\\n\\n6 %\\n\\n1 %\\n\\n15 % 302 %\\n\\n6 %\\n\\n1 % 7 % 1 %\\n\\n3 % 1 % 0 %\\n\\n7 % -79 %\\n\\n1 %\\n\\nTable of Contents\\n\\nFISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS The following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:\\n\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n\\nNIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively. The increase was due to higher revenues in North America, Europe, Middle East & Africa (\"EMEA\"), APLA and Greater China, which contributed approximately 7, 6, 2 and 1 percentage points to NIKE, Inc. Revenues, respectively.'),\n", - " 0.28352534771),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. This increase was primarily due to higher revenues in Men\\'s, the Jordan Brand, Women\\'s and Kids\\' which grew 17%, 35%,11% and 10%, respectively, on a wholesale equivalent basis.\\n\\nNIKE Brand footwear revenues increased 20% on a currency-neutral basis, due to higher revenues in Men\\'s, the Jordan Brand, Women\\'s and Kids\\'. Unit sales of footwear increased 13%, while higher average selling price (\"ASP\") per pair contributed approximately 7 percentage points of footwear revenue growth. Higher ASP was primarily due to higher full-price ASP, net of discounts, on a wholesale equivalent basis, and growth in the size of our NIKE Direct business, partially offset by lower NIKE Direct ASP.\\n\\nNIKE Brand apparel revenues increased 8% on a currency-neutral basis, primarily due to higher revenues in Men\\'s. Unit sales of apparel increased 4%, while higher ASP per unit contributed approximately 4 percentage points of apparel revenue growth. Higher ASP was primarily due to higher full-price ASP and growth in the size of our NIKE Direct business, partially offset by lower NIKE Direct ASP, reflecting higher promotional activity.\\n\\nNIKE Direct revenues increased 14% from $18.7 billion in fiscal 2022 to $21.3 billion in fiscal 2023. On a currency-neutral basis, NIKE Direct revenues increased 20% primarily driven by NIKE Brand Digital sales growth of 24%, comparable store sales growth of 14% and the addition of new stores. For further information regarding comparable store sales, including the definition, see \"Comparable Store Sales\". NIKE Brand Digital sales were $12.6 billion for fiscal 2023 compared to $10.7 billion for fiscal 2022.\\n\\n2023 FORM 10-K 33\\n\\nTable of Contents\\n\\nGROSS MARGIN FISCAL 2023 COMPARED TO FISCAL 2022\\n\\nFor fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to 43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:\\n\\nWholesale equivalent\\n\\nThe decrease in gross margin for fiscal 2023 was primarily due to:\\n\\nHigher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as\\n\\nproduct mix;'),\n", - " 0.291597783566),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"3 % -4 %\\n\\n13 % 4 %\\n\\n1,494 190\\n\\n8 % 23 %\\n\\nTOTAL REVENUES Revenues by:\\n\\n$\\n\\n6,431 $\\n\\n5,955\\n\\n8 %\\n\\n17 % $\\n\\n5,343\\n\\n11 %\\n\\nSales to Wholesale Customers Sales through NIKE Direct\\n\\n$\\n\\n3,736 $ 2,695\\n\\n3,529 2,426\\n\\n6 % 11 %\\n\\n14 % $ 22 %\\n\\n3,387 1,956\\n\\n4 % 24 %\\n\\nTOTAL REVENUES EARNINGS BEFORE INTEREST AND TAXES\\n\\n$ $\\n\\n6,431 $ 1,932 $\\n\\n5,955 1,896\\n\\n8 % 2 %\\n\\n17 % $ $\\n\\n5,343 1,530\\n\\n11 % 24 %\\n\\nAs discussed previously, our NIKE Brand business in Brazil transitioned to a distributor operating model during fiscal 2021. We completed the sale of our entity in Chile and our entities in Argentina and Uruguay to third-party distributors in the first and second quarters of fiscal 2023, respectively. The impacts of closing these transactions are included within Corporate and are not reflected in the APLA operating segment results. This completed the transition of our NIKE Brand businesses within our CASA marketplace, which now reflects a full distributor operating model. For more information see Note 18 — Acquisitions and Divestitures within the accompanying Notes to the Consolidated Financial Statements.\\n\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n\\nAPLA revenues increased 17% on a currency-neutral basis due to higher revenues across nearly all territories, led by Southeast Asia and India, Korea and Japan. The increase was partially offset by a decline in our CASA territory. Within our CASA territory, the transition of our Chile, Argentina and Uruguay entities to a third- party distributor operating model reduced APLA revenue growth by approximately 5 percentage points. Revenues increased primarily due to growth in Men's, Women's and the Jordan Brand. NIKE Direct revenues increased 22%, driven by digital sales growth of 23% and comparable store sales growth of 28%.\\n\\nFootwear revenues increased 19% on a currency-neutral basis, primarily due to higher revenues in Men's, Women's and the Jordan Brand. Unit sales of footwear increased 16%, while higher ASP per pair contributed approximately 3 percentage points of footwear revenue growth. Higher ASP per pair was primarily due to higher full-price ASP and growth in NIKE Direct, partially offset by lower NIKE Direct ASP.\\n\\nApparel revenues increased 13% on a currency-neutral basis, primarily due to higher revenues in Men's. Unit sales of apparel increased 9%, while higher ASP per\"),\n", - " 0.296876847744),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='TOTAL NIKE BRAND Converse\\n\\n$\\n\\n1,932 (4,841)\\n\\n8,359 676\\n\\n$\\n\\n1,896 (4,262)\\n\\n8,406 669\\n\\n2 % -14 %\\n\\n1 % $ 1 %\\n\\n1,530 (3,656)\\n\\n8,641 543\\n\\nCorporate TOTAL NIKE, INC. EARNINGS BEFORE INTEREST AND TAXES\\n\\n(1)\\n\\n$\\n\\n(2,840)\\n\\n6,195\\n\\n$\\n\\n(2,219)\\n\\n6,856\\n\\n28 %\\n\\n10 % $\\n\\n(2,261)\\n\\n6,923\\n\\nEBIT margin\\n\\n(1)\\n\\n12.1 %\\n\\n14.7 %\\n\\n15.5 %\\n\\nInterest expense (income), net\\n\\n(6)\\n\\n205\\n\\n—\\n\\n262\\n\\nTOTAL NIKE, INC. INCOME BEFORE INCOME TAXES\\n\\n$\\n\\n6,201\\n\\n$\\n\\n6,651\\n\\n7 % $\\n\\n6,661\\n\\n(1) Total NIKE Brand EBIT, Total NIKE, Inc. EBIT and EBIT Margin represent non-GAAP financial measures. See \"Use of Non-GAAP Financial Measures\" for further information.\\n\\n2023 FORM 10-K 36\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\n7 % 12 % -13 %\\n\\n16 % 302 %\\n\\n6 % 7 %\\n\\n— 6 %\\n\\n% CHANGE\\n\\n0 % 35 % -27 %\\n\\n24 % -17 %\\n\\n3 % 23 % 2 %\\n\\n1 %\\n\\n—\\n\\n0 %\\n\\nTable of Contents\\n\\nNORTH AMERICA\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY\\n\\nCHANGES FISCAL 2021\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY CHANGES\\n\\nRevenues by: Footwear Apparel\\n\\n$\\n\\n14,897 $ 5,947\\n\\n12,228 5,492\\n\\n22 % 8 %\\n\\n22 % $ 9 %\\n\\n11,644 5,028\\n\\n5 % 9 %\\n\\n5 % 9 %\\n\\nEquipment\\n\\nTOTAL REVENUES\\n\\n$\\n\\n764 21,608 $\\n\\n633 18,353\\n\\n21 % 18 %\\n\\n21 % 18 % $\\n\\n507 17,179\\n\\n25 % 7 %\\n\\n25 % 7 %\\n\\nRevenues by:\\n\\nSales to Wholesale Customers\\n\\n$\\n\\n11,273 $\\n\\n9,621\\n\\n17 %\\n\\n18 % $\\n\\n10,186\\n\\n6 %\\n\\n6 %\\n\\nSales through NIKE Direct\\n\\nTOTAL REVENUES\\n\\n$\\n\\n10,335 21,608 $\\n\\n8,732 18,353\\n\\n18 % 18 %\\n\\n18 % 18 % $\\n\\n6,993 17,179\\n\\n25 % 7 %\\n\\n25 % 7 %\\n\\nEARNINGS BEFORE INTEREST AND TAXES\\n\\n$\\n\\n5,454 $\\n\\n5,114\\n\\n7 %\\n\\n$\\n\\n5,089\\n\\n0 %\\n\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n\\nNorth America revenues increased 18% on a currency-neutral basis, primarily due to higher revenues in Men\\'s and the Jordan Brand. NIKE Direct revenues\\n\\nincreased 18%, driven by strong digital sales growth of 23%, comparable store sales growth of 9% and the addition of new stores.\\n\\nFootwear revenues increased 22% on a currency-neutral basis, primarily due to higher revenues in Men\\'s and the Jordan Brand. Unit sales of footwear increased\\n\\n17%, while higher ASP per pair contributed approximately 5 percentage points of footwear revenue growth. Higher ASP per pair was primarily due to higher full-price ASP and growth in NIKE Direct, partially offset by lower NIKE Direct ASP, reflecting higher promotional activity as well as lower available inventory supply in the prior period and a lower mix of full-price sales.'),\n", - " 0.301767408848)]" + "[(Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", + " 0.233286499977),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", + " 0.233286499977),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", + " 0.233286499977),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Tax (expense) benefit Gain (loss) net of tax\\n\\n5 (14)\\n\\n(9) 22\\n\\nTotal net gain (loss) reclassified for the period\\n\\n$\\n\\n463 $\\n\\n30\\n\\n2023 FORM 10-K 82\\n\\nTable of Contents\\n\\nNOTE 14 — REVENUES\\n\\nDISAGGREGATION OF REVENUES The following tables present the Company's Revenues disaggregated by reportable operating segment, major product line and distribution channel:\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nYEAR ENDED MAY 31, 2023 ASIA PACIFIC & LATIN (1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nAMERICA\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear\\n\\n$\\n\\n14,897 $\\n\\n8,260 $\\n\\n5,435 $\\n\\n4,543 $\\n\\n— $\\n\\n33,135 $\\n\\n2,155 $\\n\\n— $\\n\\n35,290\\n\\nApparel Equipment Other\\n\\n5,947 764 —\\n\\n4,566 592 —\\n\\n1,666 147 —\\n\\n1,664 224 —\\n\\n— — 58\\n\\n13,843 1,727 58\\n\\n90 28 154\\n\\n— — 27\\n\\n13,933 1,755 239\\n\\nTOTAL REVENUES\\n\\n$\\n\\n21,608 $\\n\\n13,418 $\\n\\n7,248 $\\n\\n6,431 $\\n\\n58 $\\n\\n48,763 $\\n\\n2,427 $\\n\\n27 $\\n\\n51,217\\n\\nRevenues by:\\n\\nSales to Wholesale Customers Sales through Direct to Consumer\\n\\n$\\n\\n11,273 $ 10,335\\n\\n8,522 $ 4,896\\n\\n3,866 $ 3,382\\n\\n3,736 $ 2,695\\n\\n— $ —\\n\\n27,397 $ 21,308\\n\\n1,299 $ 974\\n\\n— $ —\\n\\n28,696 22,282\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n21,608 $\\n\\n—\\n\\n13,418 $\\n\\n— 7,248 $\\n\\n— 6,431 $\\n\\n58 58 $\\n\\n58\\n\\n48,763 $\\n\\n154 2,427 $\\n\\n27 27 $\\n\\n239 51,217\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand businesses in its CASA territory to third-party distributors.\\n\\nYEAR ENDED MAY 31, 2022\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nASIA PACIFIC & LATIN AMERICA\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear Apparel\\n\\n$\\n\\n12,228 $ 5,492\\n\\n7,388 $ 4,527\\n\\n5,416 $ 1,938\\n\\n4,111 $ 1,610\\n\\n— $ —\\n\\n29,143 $ 13,567\\n\\n2,094 $ 103\\n\\n— $ —\\n\\n31,237 13,670\\n\\nEquipment Other\\n\\n633 —\\n\\n564 —\\n\\n193 —\\n\\n234 —\\n\\n— 102\\n\\n1,624 102\\n\\n26 123\\n\\n— (72)\\n\\n1,650 153\\n\\nTOTAL REVENUES Revenues by:\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\nSales to Wholesale Customers Sales through Direct to Consumer Other\\n\\n$\\n\\n9,621 $ 8,732 —\\n\\n8,377 $ 4,102 —\\n\\n4,081 $ 3,466 —\\n\\n3,529 $ 2,426 —\\n\\n— $ — 102\\n\\n25,608 $ 18,726 102\\n\\n1,292 $ 931 123\\n\\n— $ — (72)\\n\\n26,900 19,657 153\\n\\nTOTAL REVENUES\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\n2023 FORM 10-K 83\\n\\nTable of Contents\\n\\nYEAR ENDED MAY 31, 2021\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\"),\n", + " 0.261225402355)]" ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -615,7 +672,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -628,16 +685,16 @@ "data": { "text/plain": [ "[(Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", - " 0.233286261559),\n", + " 0.233286499977),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", + " 0.233286499977),\n", + " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", + " 0.233286499977),\n", " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Tax (expense) benefit Gain (loss) net of tax\\n\\n5 (14)\\n\\n(9) 22\\n\\nTotal net gain (loss) reclassified for the period\\n\\n$\\n\\n463 $\\n\\n30\\n\\n2023 FORM 10-K 82\\n\\nTable of Contents\\n\\nNOTE 14 — REVENUES\\n\\nDISAGGREGATION OF REVENUES The following tables present the Company's Revenues disaggregated by reportable operating segment, major product line and distribution channel:\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nYEAR ENDED MAY 31, 2023 ASIA PACIFIC & LATIN (1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nAMERICA\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear\\n\\n$\\n\\n14,897 $\\n\\n8,260 $\\n\\n5,435 $\\n\\n4,543 $\\n\\n— $\\n\\n33,135 $\\n\\n2,155 $\\n\\n— $\\n\\n35,290\\n\\nApparel Equipment Other\\n\\n5,947 764 —\\n\\n4,566 592 —\\n\\n1,666 147 —\\n\\n1,664 224 —\\n\\n— — 58\\n\\n13,843 1,727 58\\n\\n90 28 154\\n\\n— — 27\\n\\n13,933 1,755 239\\n\\nTOTAL REVENUES\\n\\n$\\n\\n21,608 $\\n\\n13,418 $\\n\\n7,248 $\\n\\n6,431 $\\n\\n58 $\\n\\n48,763 $\\n\\n2,427 $\\n\\n27 $\\n\\n51,217\\n\\nRevenues by:\\n\\nSales to Wholesale Customers Sales through Direct to Consumer\\n\\n$\\n\\n11,273 $ 10,335\\n\\n8,522 $ 4,896\\n\\n3,866 $ 3,382\\n\\n3,736 $ 2,695\\n\\n— $ —\\n\\n27,397 $ 21,308\\n\\n1,299 $ 974\\n\\n— $ —\\n\\n28,696 22,282\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n21,608 $\\n\\n—\\n\\n13,418 $\\n\\n— 7,248 $\\n\\n— 6,431 $\\n\\n58 58 $\\n\\n58\\n\\n48,763 $\\n\\n154 2,427 $\\n\\n27 27 $\\n\\n239 51,217\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand businesses in its CASA territory to third-party distributors.\\n\\nYEAR ENDED MAY 31, 2022\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nASIA PACIFIC & LATIN AMERICA\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear Apparel\\n\\n$\\n\\n12,228 $ 5,492\\n\\n7,388 $ 4,527\\n\\n5,416 $ 1,938\\n\\n4,111 $ 1,610\\n\\n— $ —\\n\\n29,143 $ 13,567\\n\\n2,094 $ 103\\n\\n— $ —\\n\\n31,237 13,670\\n\\nEquipment Other\\n\\n633 —\\n\\n564 —\\n\\n193 —\\n\\n234 —\\n\\n— 102\\n\\n1,624 102\\n\\n26 123\\n\\n— (72)\\n\\n1,650 153\\n\\nTOTAL REVENUES Revenues by:\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\nSales to Wholesale Customers Sales through Direct to Consumer Other\\n\\n$\\n\\n9,621 $ 8,732 —\\n\\n8,377 $ 4,102 —\\n\\n4,081 $ 3,466 —\\n\\n3,529 $ 2,426 —\\n\\n— $ — 102\\n\\n25,608 $ 18,726 102\\n\\n1,292 $ 931 123\\n\\n— $ — (72)\\n\\n26,900 19,657 153\\n\\nTOTAL REVENUES\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\n2023 FORM 10-K 83\\n\\nTable of Contents\\n\\nYEAR ENDED MAY 31, 2021\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\"),\n", - " 0.261225521564),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='4,780 (508)\\n\\n7 % -80 %\\n\\nTOTAL NIKE BRAND WHOLESALE EQUIVALENT REVENUES\\n\\n$\\n\\n40,127 $\\n\\n36,151\\n\\n11 %\\n\\n18 % $\\n\\n35,770\\n\\n1 %\\n\\n(1)\\n\\nThe percent change excluding currency changes and the presentation of wholesale equivalent revenues represent non-GAAP financial measures. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(3) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\n(4)\\n\\nAs a result of the Consumer Direct Acceleration strategy, announced in fiscal 2021, the Company is now organized around a consumer construct of Men\\'s, Women\\'s and Kids\\'. Beginning in the first quarter of fiscal 2022, unisex products are classified within Men\\'s, and Jordan Brand revenues are separately reported. Certain prior year amounts were reclassified to conform to fiscal 2022 presentation. These changes had no impact on previously reported consolidated results of operations or shareholders\\' equity.\\n\\n(5) Others include products not allocated to Men\\'s, Women\\'s, NIKE Kids\\' and Jordan Brand, as well as certain adjustments that are not allocated to products designated by consumer.\\n\\n2023 FORM 10-K 32\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES\\n\\n4 % 6 %\\n\\n18 % 302 % 6 %\\n\\n7 % —\\n\\n6 %\\n\\n1 %\\n\\n15 % 302 %\\n\\n6 %\\n\\n1 % 7 % 1 %\\n\\n3 % 1 % 0 %\\n\\n7 % -79 %\\n\\n1 %\\n\\nTable of Contents\\n\\nFISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS The following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:\\n\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n\\nNIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively. The increase was due to higher revenues in North America, Europe, Middle East & Africa (\"EMEA\"), APLA and Greater China, which contributed approximately 7, 6, 2 and 1 percentage points to NIKE, Inc. Revenues, respectively.'),\n", - " 0.28352534771),\n", - " (Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"ASIA PACIFIC & LATIN AMERICA\\n\\n(1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE BRAND\\n\\nCONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by:\\n\\nFootwear Apparel Equipment\\n\\n$\\n\\n11,644 $ 5,028 507\\n\\n6,970 $ 3,996 490\\n\\n5,748 $ 2,347 195\\n\\n3,659 $ 1,494 190\\n\\n— $ — —\\n\\n28,021 $ 12,865 1,382\\n\\n1,986 $ 104 29\\n\\n— $ — —\\n\\n30,007 12,969 1,411\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n17,179 $\\n\\n—\\n\\n11,456 $\\n\\n— 8,290 $\\n\\n— 5,343 $\\n\\n25 25 $\\n\\n25\\n\\n42,293 $\\n\\n86 2,205 $\\n\\n40 40 $\\n\\n151 44,538\\n\\nRevenues by:\\n\\nSales to Wholesale Customers $\\n\\n10,186 $\\n\\n7,812 $\\n\\n4,513 $\\n\\n3,387 $\\n\\n— $\\n\\n25,898 $\\n\\n1,353 $\\n\\n— $\\n\\n27,251\\n\\nSales through Direct to Consumer Other\\n\\n6,993 —\\n\\n3,644 —\\n\\n3,777 —\\n\\n1,956 —\\n\\n— 25\\n\\n16,370 25\\n\\n766 86\\n\\n— 40\\n\\n17,136 151\\n\\nTOTAL REVENUES\\n\\n$\\n\\n17,179 $\\n\\n11,456 $\\n\\n8,290 $\\n\\n5,343 $\\n\\n25 $\\n\\n42,293 $\\n\\n2,205 $\\n\\n40 $\\n\\n44,538\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand business in Brazil to a third-party distributor.\\n\\nFor the fiscal years ended May 31, 2023, 2022 and 2021, Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment. Converse Other revenues were primarily attributable to licensing businesses. Corporate revenues primarily consisted of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse but managed through the Company's central foreign exchange risk management program.\\n\\nAs of May 31, 2023 and 2022, the Company did not have any contract assets and had an immaterial amount of contract liabilities recorded in Accrued liabilities on the Consolidated Balance Sheets.\\n\\nSALES-RELATED RESERVES\\n\\nAs of May 31, 2023 and 2022, the Company's sales-related reserve balance, which includes returns, post-invoice sales discounts and miscellaneous claims, was $994 million and $1,015 million, respectively, recorded in Accrued liabilities on the Consolidated Balance Sheets. The estimated cost of inventory for expected product returns was $226 million and $194 million as of May 31, 2023 and 2022, respectively, and was recorded in Prepaid expenses and other current assets on the Consolidated Balance Sheets.\\n\\nNOTE 15 — OPERATING SEGMENTS AND RELATED INFORMATION\"),\n", - " 0.285882711411)]" + " 0.261225402355)]" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -671,7 +728,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -698,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -738,7 +795,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -765,7 +822,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -778,10 +835,10 @@ { "data": { "text/plain": [ - "\"Nike's revenue for the fiscal year ended May 31, 2023, was $51,217 million, while the revenue for the fiscal year ended May 31, 2022, was $46,710 million. This represents an increase in revenue from the previous year.\"" + "\"Nike's revenue last year was $44,538 million, and this year it was $51,217 million.\"" ] }, - "execution_count": 20, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -793,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -806,10 +863,10 @@ { "data": { "text/plain": [ - "'Nike offers three main types of products: footwear, apparel, and equipment. Nike is part of the athletic footwear, apparel, and equipment industry.'" + "'The exact number of products Nike offers is not explicitly stated in the provided context. However, Nike is part of the athletic footwear, apparel, and equipment industry, which is highly competitive both in the United States and worldwide.'" ] }, - "execution_count": 21, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -821,7 +878,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -834,10 +891,10 @@ { "data": { "text/plain": [ - "\"I don't have access to real-time information or subjective assessments, so I cannot provide a definitive answer on whether Nike is considered an ethical company. It is recommended to research and analyze various sources, including corporate social responsibility reports and news articles, to form your own opinion on the ethical practices of Nike.\"" + "'Based on the provided information, there is no specific mention or data that directly addresses the ethical practices of Nike as a company. Therefore, it is not possible to determine if Nike is an ethical company based on the provided context.'" ] }, - "execution_count": 22, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -860,11 +917,26 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "metadata": { "id": "DtZi-mQ61vm-" }, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "REDIS_URL env var not set", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mredisvl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindex\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SearchIndex\n\u001b[0;32m----> 3\u001b[0m idx \u001b[38;5;241m=\u001b[39m \u001b[43mSearchIndex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_existing\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mredis_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mREDIS_URL\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m idx\u001b[38;5;241m.\u001b[39mdelete()\n", + "File \u001b[0;32m~/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/index/index.py:322\u001b[0m, in \u001b[0;36mSearchIndex.from_existing\u001b[0;34m(cls, name, redis_client, redis_url, **kwargs)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m redis_url:\n\u001b[0;32m--> 322\u001b[0m redis_client \u001b[38;5;241m=\u001b[39m \u001b[43mRedisConnectionFactory\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_redis_connection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 323\u001b[0m \u001b[43m \u001b[49m\u001b[43mredis_url\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mredis_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 324\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequired_modules\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mREQUIRED_MODULES_FOR_INTROSPECTION\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 325\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 327\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m redis_client:\n\u001b[1;32m 328\u001b[0m RedisConnectionFactory\u001b[38;5;241m.\u001b[39mvalidate_sync_redis(\n\u001b[1;32m 329\u001b[0m redis_client, required_modules\u001b[38;5;241m=\u001b[39mREQUIRED_MODULES_FOR_INTROSPECTION\n\u001b[1;32m 330\u001b[0m )\n", + "File \u001b[0;32m~/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/redis/connection.py:248\u001b[0m, in \u001b[0;36mRedisConnectionFactory.get_redis_connection\u001b[0;34m(url, required_modules, **kwargs)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_redis_connection\u001b[39m(\n\u001b[1;32m 226\u001b[0m url: Optional[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 227\u001b[0m required_modules: Optional[List[Dict[\u001b[38;5;28mstr\u001b[39m, Any]]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 228\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 229\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Redis:\n\u001b[1;32m 230\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Creates and returns a synchronous Redis client.\u001b[39;00m\n\u001b[1;32m 231\u001b[0m \n\u001b[1;32m 232\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[38;5;124;03m RedisModuleVersionError: If required Redis modules are not installed.\u001b[39;00m\n\u001b[1;32m 247\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 248\u001b[0m url \u001b[38;5;241m=\u001b[39m url \u001b[38;5;129;01mor\u001b[39;00m \u001b[43mget_address_from_env\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 249\u001b[0m client \u001b[38;5;241m=\u001b[39m Redis\u001b[38;5;241m.\u001b[39mfrom_url(url, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 251\u001b[0m RedisConnectionFactory\u001b[38;5;241m.\u001b[39mvalidate_sync_redis(\n\u001b[1;32m 252\u001b[0m client, required_modules\u001b[38;5;241m=\u001b[39mrequired_modules\n\u001b[1;32m 253\u001b[0m )\n", + "File \u001b[0;32m~/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/redis/connection.py:61\u001b[0m, in \u001b[0;36mget_address_from_env\u001b[0;34m()\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Get a redis connection from environment variables.\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \n\u001b[1;32m 57\u001b[0m \u001b[38;5;124;03mReturns:\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;124;03m str: Redis URL\u001b[39;00m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mREDIS_URL\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m os\u001b[38;5;241m.\u001b[39menviron:\n\u001b[0;32m---> 61\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mREDIS_URL env var not set\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mREDIS_URL\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", + "\u001b[0;31mValueError\u001b[0m: REDIS_URL env var not set" + ] + } + ], "source": [ "from redisvl.index import SearchIndex\n", "\n", @@ -875,6 +947,77 @@ "\n", "idx.delete()" ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.4.0'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import redisvl\n", + "\n", + "redisvl.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'5.2.1'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import redis\n", + "\n", + "redis.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'redis://:@localhost:6379'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "REDIS_URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/python-recipes/RAG/03_llamaindex.ipynb b/python-recipes/RAG/03_llamaindex.ipynb index 068cabff..1e881cb9 100644 --- a/python-recipes/RAG/03_llamaindex.ipynb +++ b/python-recipes/RAG/03_llamaindex.ipynb @@ -60,19 +60,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ - "%pip install -q llama-index llama-index-vector-stores-redis llama-index-embeddings-cohere llama-index-embeddings-openai" + "%pip install -q llama-index \"llama-index-vector-stores-redis>=0.4.0\" llama-index-embeddings-cohere llama-index-embeddings-openai" ] }, { @@ -132,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -171,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -184,13 +176,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sample doc Doc ID: c013353e-dae7-4d17-befd-9e784c8acf79\n", - "Text: UNITED STATES SECURITIES AND EXCHANGE COMMISSION Washington,\n", - "D.C. 20549 FORM 10-K (Mark One) ☒ ANNUAL REPORT PURSUANT T O SECTION\n", - "13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the fiscal year\n", - "ended September 24, 2022 or ☐ TRANSITION REPORT PURSUANT T O SECTION\n", - "13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934 For the transition\n", - "period...\n" + "Sample doc Doc ID: b90e8ae9-7204-4e86-87ff-16cc68f9fff4\n", + "Text: 2022 COLORADO\n" ] } ], @@ -209,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -229,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -251,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -269,30 +256,30 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Node ID: d2e6cd9c-0716-49d8-8563-407a00d05445\n", - "Text: Table of Contents FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS The\n", + "Node ID: 023a5d47-4560-4591-ab20-37e4522863aa\n", + "Text: Table of Contents FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTSThe\n", "following tables present NIKE Brand revenues disaggregated by\n", "reportable operating segment, distribution channel and major product\n", - "line: FISCAL 2023 COMPARED TO FISCAL 2022 •NIKE, Inc. Revenues were\n", + "line: FISCAL 2023 COMPARED TO FISCAL 2022 • NIKE, Inc. Revenues were\n", "$51.2 billion in fiscal 2023, which increased 10% and 16% compared to\n", "fiscal 2022 on...\n", - "Score: 0.900\n", + "Score: 0.899\n", "\n", - "Node ID: 28542d3b-b345-4e9e-b675-f62361ec85d9\n", - "Text: Table of Contents NORTH AMERICA (Dollars in millions) FISCAL\n", - "2023FISCAL 2022 % CHANGE% CHANGE EXCLUDING CURRENCY CHANGESFISCAL 2021\n", - "% CHANGE% CHANGE EXCLUDING CURRENCY CHANGES Revenues by: Footwear $\n", - "14,897 $ 12,228 22 % 22 %$ 11,644 5 % 5 % Apparel 5,947 5,492 8 % 9 %\n", - "5,028 9 % 9 % Equipment 764 633 21 % 21 % 507 25 % 25 % TOTAL REVENUES\n", - "$ 21,6...\n", - "Score: 0.885\n", + "Node ID: 10b3b6b1-112c-4279-a75a-d4d866c07f6b\n", + "Text: Sales through NIKE Direct Global Brand Divisions in FISCAL 2023\n", + "amounted to $21,308 million. Total NIKE Brand Wholesale Equivalent\n", + "Revenues for FISCAL 2023 were $48,763 million, with a 10% rise from\n", + "FISCAL 2022. NIKE Brand Wholesale Equivalent Revenues included sales\n", + "from Men's, Women's, and NIKE Kids' categories. Jordan Brand revenues\n", + "increased...\n", + "Score: 0.883\n", "\n" ] } @@ -313,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -322,7 +309,7 @@ "\"NIKE's revenue in fiscal 23 was $51.2 billion.\"" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -347,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -388,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -397,7 +384,7 @@ "IndexInfo(name='custom_index', prefix='docs', key_separator=':', storage_type=)" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -408,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -422,7 +409,7 @@ " 'vector': HNSWVectorField(name='vector', type='vector', path=None, attrs=HNSWVectorFieldAttributes(dims=1536, algorithm=, datatype=, distance_metric=, initial_cap=None, m=16, ef_construction=200, ef_runtime=10, epsilon=0.01))}" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -433,28 +420,7 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# from datetime import datetime\n", - "\n", - "\n", - "# def date_to_timestamp(date_string: str) -> int:\n", - "# date_format: str = \"%Y-%m-%d\"\n", - "# return int(datetime.strptime(date_string, date_format).timestamp())\n", - "\n", - "\n", - "# # iterate through documents and add new field\n", - "# for document in docs:\n", - "# document.metadata[\"updated_at\"] = date_to_timestamp(\n", - "# document.metadata[\"last_modified_date\"]\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -482,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -510,23 +476,23 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Node ID: cd0c5d8f-e3b1-4cbb-aa6a-5960003cdb2d\n", + "Node ID: 013f339e-7fda-4fc7-baf0-afbb3dadf47d\n", "Text: Table of Contents valuation. In the ordinary course of our\n", "business, there are many transactions and calculations for which the\n", "ultimate tax determination is uncertain. Significant judgment is\n", "required in evaluating and estimating our tax expense, assets, and\n", "liabilities. We are also subject to tax controversies in various\n", "jurisdictions that can...\n", - "Score: 0.746\n", + "Score: 0.747\n", "\n", - "Node ID: 6745f668-4c7a-43bf-a9c3-9b04e1a497f8\n", + "Node ID: ac3f2b03-0520-4a50-ba3e-a97ad0a6f643\n", "Text: Table of Contents Included in other income (expense), net in\n", "2021 and 2022 is a marketable equity securities valuation gain (loss)\n", "of $11.8 billion and $(12.7) billion from our equity investment in\n", @@ -535,7 +501,7 @@ "observable changes in ...\n", "Score: 0.740\n", "\n", - "Node ID: 717666fe-fea5-488b-999c-84e6d8b9a0db\n", + "Node ID: 62ef1673-dcfe-4ba0-a437-7b142cda4114\n", "Text: Exhibit 31.1 CERTIFICATIONS I, Andrew R. Jassy, certify that: 1.\n", "I have reviewed this Form 10-K of Amazon.com, Inc.; 2. Based on my\n", "knowledge, this report does not contain any untrue statement of a\n", diff --git a/python-recipes/RAG/04_advanced_redisvl.ipynb b/python-recipes/RAG/04_advanced_redisvl.ipynb index 227d95c0..96141b2a 100644 --- a/python-recipes/RAG/04_advanced_redisvl.ipynb +++ b/python-recipes/RAG/04_advanced_redisvl.ipynb @@ -92,7 +92,7 @@ } ], "source": [ - "%pip install -q redis redisvl pandas \"unstructured[pdf]\" sentence-transformers langchain langchain-community \"openai>=1.57.0\" tqdm" + "%pip install -q \"redisvl>=0.4.1\" pandas \"unstructured[pdf]\" sentence-transformers langchain langchain-community \"openai>=1.57.0\" tqdm" ] }, { @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -199,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "id": "uijl2qFH1ui3" }, @@ -208,7 +208,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Done preprocessing. Created 210 chunks of the original pdf resources/nke-10k-2023.pdf\n" + "Done preprocessing. Created 211 chunks of the original pdf resources/nke-10k-2023.pdf\n" ] } ], @@ -231,16 +231,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Document(metadata={'source': 'resources/nke-10k-2023.pdf', 'page': 0, 'page_label': '1'}, page_content=\"Table of Contents\\nUNITED STATES\\nSECURITIES AND EXCHANGE COMMISSION\\nWashington, D.C. 20549\\nFORM 10-K\\n(Mark One)\\n☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\nFOR THE FISCAL YEAR ENDED MAY 31, 2023\\nOR\\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\nFOR THE TRANSITION PERIOD FROM TO .\\nCommission File No. 1-10635\\nNIKE, Inc.\\n(Exact name of Registrant as specified in its charter)\\nOregon 93-0584541\\n(State or other jurisdiction of incorporation) (IRS Employer Identification No.)\\nOne Bowerman Drive, Beaverton, Oregon 97005-6453\\n(Address of principal executive offices and zip code)\\n(503) 671-6453\\n(Registrant's telephone number, including area code)\\nSECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:\\nClass B Common Stock NKE New York Stock Exchange\\n(Title of each class) (Trading symbol) (Name of each exchange on which registered)\\nSECURITIES REGISTERED PURSUANT TO SECTION 12(G) OF THE ACT:\\nNONE\\nIndicate by check mark: YES NO\\n•if the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act. þ ¨\\n•if the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Act. ¨ þ\\n•whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding\\n12 months (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the\\npast 90 days.þ ¨\\n•whether the registrant has submitted electronically every Interactive Data File required to be submitted pursuant to Rule 405 of Regulation S-T\\n(§232.405 of this chapter) during the preceding 12 months (or for such shorter period that the registrant was required to submit such files).þ ¨\\n•whether the registrant is a large accelerated filer, an accelerated filer, a non-accelerated filer, a smaller reporting company or an emerging growth company. See the definitions of “large accelerated filer,”\\n“accelerated filer,” “smaller reporting company,” and “emerging growth company” in Rule 12b-2 of the Exchange Act.\\nLarge accelerated filer þ Accelerated filer ☐ Non-accelerated filer ☐ Smaller reporting company ☐ Emerging growth company ☐\\n•if an emerging growth company, if the registrant has elected not to use the extended transition period for complying with any new or revised financial\")" + "Document(metadata={'source': 'resources/nke-10k-2023.pdf', 'page': 0, 'page_label': '1'}, page_content=\"Table of Contents\\nUNITED STATES\\nSECURITIES AND EXCHANGE COMMISSION\\nWashington, D.C. 20549\\nFORM 10-K\\n(Mark One)\\n☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\nFOR THE FISCAL YEAR ENDED MAY 31, 2023\\nOR\\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934\\nFOR THE TRANSITION PERIOD FROM TO .\\nCommission File No. 1-10635\\nNIKE, Inc.\\n(Exact name of Registrant as specified in its charter)\\nOregon 93-0584541\\n(State or other jurisdiction of incorporation) (IRS Employer Identification No.)\\nOne Bowerman Drive, Beaverton, Oregon 97005-6453\\n(Address of principal executive offices and zip code)\\n(503) 671-6453\\n(Registrant's telephone number, including area code)\\nSECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:\\nClass B Common Stock NKE New York Stock Exchange\\n(Title of each class) (Trading symbol) (Name of each exchange on which registered)\\nSECURITIES REGISTERED PURSUANT TO SECTION 12(G) OF THE ACT:\\nNONE\\nIndicate by check mark: YES NO\\n• if the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act. þ ¨ \\n• if the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Act. ¨ þ \\n• whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding\\n12 months (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the\\npast 90 days.\\nþ ¨ \\n• whether the registrant has submitted electronically every Interactive Data File required to be submitted pursuant to Rule 405 of Regulation S-T\\n(§232.405 of this chapter) during the preceding 12 months (or for such shorter period that the registrant was required to submit such files).\\nþ ¨ \\n• whether the registrant is a large accelerated filer, an accelerated filer, a non-accelerated filer, a smaller reporting company or an emerging growth company. See the definitions of “large accelerated filer,”\\n“accelerated filer,” “smaller reporting company,” and “emerging growth company” in Rule 12b-2 of the Exchange Act.\\nLarge accelerated filer þ Accelerated filer ☐ Non-accelerated filer ☐ Smaller reporting company ☐ Emerging growth company ☐\")" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -267,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -284,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -341,227 +341,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/210 [00:00\n", " 0\n", " What is the trend in the company's revenue and...\n", - " Based on the provided financial information fr...\n", + " The company experienced revenue growth in fisc...\n", " \n", " \n", " 1\n", @@ -1119,7 +752,7 @@ " \n", " 2\n", " How much debt does the company have, and what ...\n", - " The company has long-term debt of $8,927 milli...\n", + " As of May 31, 2023, the company had Long-term ...\n", " \n", " \n", " 3\n", @@ -1129,7 +762,7 @@ " \n", " 4\n", " What is the company's strategy for growth?\n", - " NIKE's strategy for growth focuses on long-ter...\n", + " The company's strategy for growth includes ide...\n", " \n", " \n", "\n", @@ -1144,14 +777,14 @@ "4 What is the company's strategy for growth? \n", "\n", " answer \n", - "0 Based on the provided financial information fr... \n", + "0 The company experienced revenue growth in fisc... \n", "1 The company's primary revenue sources are from... \n", - "2 The company has long-term debt of $8,927 milli... \n", + "2 As of May 31, 2023, the company had Long-term ... \n", "3 The company acknowledges the importance of env... \n", - "4 NIKE's strategy for growth focuses on long-ter... " + "4 The company's strategy for growth includes ide... " ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1182,38 +815,23 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2f92142ba61a4b5492677b34265e5487", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Batches: 0%| | 0/1 [00:00=0.3.0" + "%pip install -q \"redisvl>=0.4.1\"" ] }, { @@ -608,7 +608,8 @@ "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.11.9" } }, "nbformat": 4, diff --git a/python-recipes/RAG/06_ragas_evaluation.ipynb b/python-recipes/RAG/06_ragas_evaluation.ipynb index 7600b083..c3b112e8 100644 --- a/python-recipes/RAG/06_ragas_evaluation.ipynb +++ b/python-recipes/RAG/06_ragas_evaluation.ipynb @@ -1,1229 +1,1229 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", - "# Evaluating RAG\n", - "\n", - "This notebook uses the [ragas library](https://docs.ragas.io/en/stable/getstarted/index.html) and [Redis](https://redis.com) to evaluate the performance of sample RAG application. Also see the original [source paper](https://arxiv.org/pdf/2309.15217) to build a more detailed understanding.\n", - "\n", - "## Let's Begin!\n", - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To start, we need a RAG app to evaluate. Let's create one using LangChain and connect it with Redis as the vector DB.\n", - "\n", - "## Init redis, data prep, and populating the vector DB" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "%pip install -q redis \"unstructured[pdf]\" sentence-transformers langchain langchain-redis langchain-huggingface langchain-openai ragas datasets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Running Redis in Colab\n", - "Use the shell script below to download, extract, and install [Redis Stack](https://redis.io/docs/getting-started/install-stack/) directly from the Redis package archive." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# NBVAL_SKIP\n", - "%%sh\n", - "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", - "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", - "sudo apt-get update > /dev/null 2>&1\n", - "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", - "redis-stack-server --daemonize yes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### For Alternative Environments\n", - "There are many ways to get the necessary redis-stack instance running\n", - "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n", - "own version of Redis Enterprise running, that works too!\n", - "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n", - "3. With docker: `docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest`" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "\n", - "# Replace values below with your own if using Redis Cloud instance\n", - "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") # ex: \"redis-18374.c253.us-central1-1.gce.cloud.redislabs.com\"\n", - "REDIS_PORT = os.getenv(\"REDIS_PORT\", \"6379\") # ex: 18374\n", - "REDIS_PASSWORD = os.getenv(\"REDIS_PASSWORD\", \"\") # ex: \"1TNxTEdYRDgIDKM2gDfasupCADXXXX\"\n", - "\n", - "# If SSL is enabled on the endpoint, use rediss:// as the URL prefix\n", - "REDIS_URL = f\"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "from langchain_community.document_loaders import PyPDFLoader\n", - "\n", - "CHUNK_SIZE = 2500\n", - "CHUNK_OVERLAP = 0\n", - "\n", - "# pdf to load\n", - "path = 'resources/nke-10k-2023.pdf'\n", - "assert os.path.exists(path), f\"File not found: {path}\"\n", - "\n", - "# load and split\n", - "loader = PyPDFLoader(path)\n", - "pages = loader.load()\n", - "text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)\n", - "chunks = text_splitter.split_documents(pages)\n", - "\n", - "print(\"Done preprocessing. Created\", len(chunks), \"chunks of the original pdf\", path)" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Table of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549FORM 10-K(Mark One)☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934FOR THE FISCAL YEAR ENDED MAY 31, 2023OR☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934FOR THE TRANSITION PERIOD FROM TO .Commission File No. 1-10635\\n\\nAs of November 30, 2022, the aggregate market values of the Registrant's Common Stock held by non-affiliates were:Class A$7,831,564,572 Class B136,467,702,472 $144,299,267,044\\n\\nNIKE, Inc.(Exact name of Registrant as specified in its charter)Oregon93-0584541(State or other jurisdiction of incorporation)(IRS Employer Identification No.)One Bowerman Drive, Beaverton, Oregon 97005-6453(Address of principal executive offices and zip code)(503) 671-6453(Registrant's telephone number, including area code)SECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:Class B Common StockNKENew York Stock Exchange(Title of each class)(Trading symbol)(Name of each exchange on which registered)SECURITIES REGISTERED PURSUANT TO SECTION 12(G) OF THE ACT:NONE\")" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "chunks[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_huggingface import HuggingFaceEmbeddings\n", - "\n", - "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_redis import RedisVectorStore\n", - "\n", - "# set the index name for this example\n", - "index_name = \"ragas_ex\"\n", - "\n", - "# construct the vector store class from texts and metadata\n", - "rds = RedisVectorStore.from_documents(\n", - " chunks,\n", - " embeddings,\n", - " index_name=index_name,\n", - " redis_url=REDIS_URL,\n", - " metadata_schema=[\n", - " {\n", - " \"name\": \"source\",\n", - " \"type\": \"text\"\n", - " },\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test the vector store" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'" - ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rds.similarity_search(\"What was nike's revenue last year?\")[0].page_content" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup RAG\n", - "\n", - "Now that the vector db is populated let's initialize our RAG app." - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [], - "source": [ - "import getpass\n", - "from langchain_openai import ChatOpenAI\n", - "\n", - "if \"OPENAI_API_KEY\" not in os.environ:\n", - " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OPENAI_API_KEY\")\n", - "\n", - "llm = ChatOpenAI(\n", - " openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n", - " model=\"gpt-3.5-turbo-16k\",\n", - " max_tokens=None\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_core.prompts import ChatPromptTemplate\n", - "\n", - "system_prompt = \"\"\"\n", - " Use the following pieces of context from financial 10k filings data to answer the user question at the end. \n", - " If you don't know the answer, say that you don't know, don't try to make up an answer.\n", - "\n", - " Context:\n", - " ---------\n", - " {context}\n", - "\"\"\"\n", - "\n", - "def format_docs(docs):\n", - " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", - "\n", - "prompt = ChatPromptTemplate.from_messages(\n", - " [\n", - " (\"system\", system_prompt),\n", - " (\"human\", \"{input}\")\n", - " ]\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test it out" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'input': \"What was nike's revenue last year?\",\n", - " 'context': [Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", - " Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"NIKE, INC. CONSOLIDATED STATEMENTS OF INCOME\\n\\n(In millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense NET INCOME\\n\\nEarnings per common share:\\n\\nBasic Diluted\\n\\nWeighted average common shares outstanding:\\n\\nBasic Diluted\\n\\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n\\n$\\n\\n$\\n\\n$ $\\n\\nYEAR ENDED MAY 31,\\n\\n2023\\n\\n2022\\n\\n2021\\n\\n51,217 $ 28,925\\n\\n46,710 $ 25,231\\n\\n44,538 24,576\\n\\n22,292 4,060 12,317\\n\\n21,479 3,850 10,954\\n\\n19,962 3,114 9,911\\n\\n16,377 (6)\\n\\n14,804 205\\n\\n13,025 262\\n\\n(280) 6,201\\n\\n(181) 6,651\\n\\n14 6,661\\n\\n1,131 5,070 $\\n\\n605 6,046 $\\n\\n934 5,727\\n\\n3.27 $ 3.23 $\\n\\n3.83 $ 3.75 $\\n\\n3.64 3.56\\n\\n1,551.6 1,569.8\\n\\n1,578.8 1,610.8\\n\\n1,573.0 1,609.4\\n\\n2023 FORM 10-K 55\\n\\nTable of Contents\\n\\nNIKE, INC. CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME\\n\\nYEAR ENDED MAY 31,\\n\\n(Dollars in millions)\\n\\n2023\\n\\n2022\\n\\nNet income Other comprehensive income (loss), net of tax:\\n\\n$\\n\\n5,070 $\\n\\n6,046 $\\n\\nChange in net foreign currency translation adjustment\\n\\n267\\n\\n(522)\\n\\nChange in net gains (losses) on cash flow hedges Change in net gains (losses) on other\\n\\n(348) (6)\\n\\n1,214 6\\n\\nTotal other comprehensive income (loss), net of tax TOTAL COMPREHENSIVE INCOME\\n\\n$\\n\\n(87) 4,983 $\\n\\n698 6,744 $\\n\\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n\\n2023 FORM 10-K 56\\n\\n2021\\n\\n5,727\\n\\n496\\n\\n(825) 5\\n\\n(324) 5,403\\n\\nTable of Contents\\n\\nNIKE, INC. CONSOLIDATED BALANCE SHEETS\\n\\n(In millions)\\n\\nASSETS\\n\\nCurrent assets:\\n\\nCash and equivalents Short-term investments\\n\\nAccounts receivable, net Inventories Prepaid expenses and other current assets\\n\\nTotal current assets\\n\\nProperty, plant and equipment, net\\n\\nOperating lease right-of-use assets, net Identifiable intangible assets, net Goodwill\\n\\nDeferred income taxes and other assets\\n\\nTOTAL ASSETS\\n\\nLIABILITIES AND SHAREHOLDERS' EQUITY Current liabilities:\\n\\nCurrent portion of long-term debt Notes payable Accounts payable\\n\\nCurrent portion of operating lease liabilities Accrued liabilities Income taxes payable\\n\\nTotal current liabilities\\n\\nLong-term debt\\n\\nOperating lease liabilities Deferred income taxes and other liabilities Commitments and contingencies (Note 16)\\n\\nRedeemable preferred stock Shareholders' equity: Common stock at stated value:\"),\n", - " Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Tax (expense) benefit Gain (loss) net of tax\\n\\n5 (14)\\n\\n(9) 22\\n\\nTotal net gain (loss) reclassified for the period\\n\\n$\\n\\n463 $\\n\\n30\\n\\n2023 FORM 10-K 82\\n\\nTable of Contents\\n\\nNOTE 14 — REVENUES\\n\\nDISAGGREGATION OF REVENUES The following tables present the Company's Revenues disaggregated by reportable operating segment, major product line and distribution channel:\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nYEAR ENDED MAY 31, 2023 ASIA PACIFIC & LATIN (1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nAMERICA\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear\\n\\n$\\n\\n14,897 $\\n\\n8,260 $\\n\\n5,435 $\\n\\n4,543 $\\n\\n— $\\n\\n33,135 $\\n\\n2,155 $\\n\\n— $\\n\\n35,290\\n\\nApparel Equipment Other\\n\\n5,947 764 —\\n\\n4,566 592 —\\n\\n1,666 147 —\\n\\n1,664 224 —\\n\\n— — 58\\n\\n13,843 1,727 58\\n\\n90 28 154\\n\\n— — 27\\n\\n13,933 1,755 239\\n\\nTOTAL REVENUES\\n\\n$\\n\\n21,608 $\\n\\n13,418 $\\n\\n7,248 $\\n\\n6,431 $\\n\\n58 $\\n\\n48,763 $\\n\\n2,427 $\\n\\n27 $\\n\\n51,217\\n\\nRevenues by:\\n\\nSales to Wholesale Customers Sales through Direct to Consumer\\n\\n$\\n\\n11,273 $ 10,335\\n\\n8,522 $ 4,896\\n\\n3,866 $ 3,382\\n\\n3,736 $ 2,695\\n\\n— $ —\\n\\n27,397 $ 21,308\\n\\n1,299 $ 974\\n\\n— $ —\\n\\n28,696 22,282\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n21,608 $\\n\\n—\\n\\n13,418 $\\n\\n— 7,248 $\\n\\n— 6,431 $\\n\\n58 58 $\\n\\n58\\n\\n48,763 $\\n\\n154 2,427 $\\n\\n27 27 $\\n\\n239 51,217\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand businesses in its CASA territory to third-party distributors.\\n\\nYEAR ENDED MAY 31, 2022\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nASIA PACIFIC & LATIN AMERICA\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear Apparel\\n\\n$\\n\\n12,228 $ 5,492\\n\\n7,388 $ 4,527\\n\\n5,416 $ 1,938\\n\\n4,111 $ 1,610\\n\\n— $ —\\n\\n29,143 $ 13,567\\n\\n2,094 $ 103\\n\\n— $ —\\n\\n31,237 13,670\\n\\nEquipment Other\\n\\n633 —\\n\\n564 —\\n\\n193 —\\n\\n234 —\\n\\n— 102\\n\\n1,624 102\\n\\n26 123\\n\\n— (72)\\n\\n1,650 153\\n\\nTOTAL REVENUES Revenues by:\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\nSales to Wholesale Customers Sales through Direct to Consumer Other\\n\\n$\\n\\n9,621 $ 8,732 —\\n\\n8,377 $ 4,102 —\\n\\n4,081 $ 3,466 —\\n\\n3,529 $ 2,426 —\\n\\n— $ — 102\\n\\n25,608 $ 18,726 102\\n\\n1,292 $ 931 123\\n\\n— $ — (72)\\n\\n26,900 19,657 153\\n\\nTOTAL REVENUES\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\n2023 FORM 10-K 83\\n\\nTable of Contents\\n\\nYEAR ENDED MAY 31, 2021\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\"),\n", - " Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"ASIA PACIFIC & LATIN AMERICA\\n\\n(1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE BRAND\\n\\nCONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by:\\n\\nFootwear Apparel Equipment\\n\\n$\\n\\n11,644 $ 5,028 507\\n\\n6,970 $ 3,996 490\\n\\n5,748 $ 2,347 195\\n\\n3,659 $ 1,494 190\\n\\n— $ — —\\n\\n28,021 $ 12,865 1,382\\n\\n1,986 $ 104 29\\n\\n— $ — —\\n\\n30,007 12,969 1,411\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n17,179 $\\n\\n—\\n\\n11,456 $\\n\\n— 8,290 $\\n\\n— 5,343 $\\n\\n25 25 $\\n\\n25\\n\\n42,293 $\\n\\n86 2,205 $\\n\\n40 40 $\\n\\n151 44,538\\n\\nRevenues by:\\n\\nSales to Wholesale Customers $\\n\\n10,186 $\\n\\n7,812 $\\n\\n4,513 $\\n\\n3,387 $\\n\\n— $\\n\\n25,898 $\\n\\n1,353 $\\n\\n— $\\n\\n27,251\\n\\nSales through Direct to Consumer Other\\n\\n6,993 —\\n\\n3,644 —\\n\\n3,777 —\\n\\n1,956 —\\n\\n— 25\\n\\n16,370 25\\n\\n766 86\\n\\n— 40\\n\\n17,136 151\\n\\nTOTAL REVENUES\\n\\n$\\n\\n17,179 $\\n\\n11,456 $\\n\\n8,290 $\\n\\n5,343 $\\n\\n25 $\\n\\n42,293 $\\n\\n2,205 $\\n\\n40 $\\n\\n44,538\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand business in Brazil to a third-party distributor.\\n\\nFor the fiscal years ended May 31, 2023, 2022 and 2021, Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment. Converse Other revenues were primarily attributable to licensing businesses. Corporate revenues primarily consisted of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse but managed through the Company's central foreign exchange risk management program.\\n\\nAs of May 31, 2023 and 2022, the Company did not have any contract assets and had an immaterial amount of contract liabilities recorded in Accrued liabilities on the Consolidated Balance Sheets.\\n\\nSALES-RELATED RESERVES\\n\\nAs of May 31, 2023 and 2022, the Company's sales-related reserve balance, which includes returns, post-invoice sales discounts and miscellaneous claims, was $994 million and $1,015 million, respectively, recorded in Accrued liabilities on the Consolidated Balance Sheets. The estimated cost of inventory for expected product returns was $226 million and $194 million as of May 31, 2023 and 2022, respectively, and was recorded in Prepaid expenses and other current assets on the Consolidated Balance Sheets.\\n\\nNOTE 15 — OPERATING SEGMENTS AND RELATED INFORMATION\")],\n", - " 'answer': \"Nike's revenue last year was $51,217 million.\"}" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain.chains import create_retrieval_chain\n", - "from langchain.chains.combine_documents import create_stuff_documents_chain\n", - "\n", - "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n", - "rag_chain = create_retrieval_chain(rds.as_retriever(), question_answer_chain)\n", - "\n", - "rag_chain.invoke({\"input\": \"What was nike's revenue last year?\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## (Optional) Creating a test set\n", - "\n", - "Now that our setup is complete and we have our RAG app to evaluate we need a test set to evaluate against. The ragas library provides a helpful class for generating a synthetic test set given our data as input that we will use here. The output of this generation is a set of `questions`, `contexts`, and `ground_truth`. \n", - "\n", - "The questions are generated by an LLM based on slices of context from the provided doc and the ground_truth is determined via a critic LLM. Note there is nothing special about this data itself and you can provide your own `questions` and `ground_truth` for evaluation purposes. When starting a project however, there is often a lack of quality human labeled data to be used for evaluation and a synthetic dataset is a valuable place to start if pre live user/process data (which should be incorporated as an ultimate goal).\n", - "\n", - "For more detail see [the docs](https://docs.ragas.io/en/stable/concepts/testset_generation.html)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# NBVAL_SKIP\n", - "# source: https://docs.ragas.io/en/latest/getstarted/testset_generation.html\n", - "from ragas.testset.generator import TestsetGenerator\n", - "from ragas.testset.evolutions import simple, reasoning, multi_context\n", - "from ragas.run_config import RunConfig\n", - "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", - "\n", - "run_config = RunConfig(\n", - " timeout=200,\n", - " max_wait=160,\n", - " max_retries=3,\n", - ")\n", - "\n", - "# generator with openai models\n", - "generator_llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\")\n", - "critic_llm = ChatOpenAI(model=\"gpt-4o-mini\")\n", - "embeddings = OpenAIEmbeddings()\n", - "\n", - "generator = TestsetGenerator.from_langchain(\n", - " generator_llm,\n", - " critic_llm,\n", - " embeddings,\n", - " run_config=run_config,\n", - ")\n", - "\n", - "testset = generator.generate_with_langchain_docs(\n", - " chunks,\n", - " test_size=10,\n", - " distributions={\n", - " simple: 0.5,\n", - " reasoning: 0.25,\n", - " multi_context: 0.25\n", - " },\n", - " run_config=run_config\n", - ")\n", - "\n", - "# save to csv since this can be a time consuming process\n", - "testset.to_pandas().to_csv(\"resources/new_testset.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Evaluation helper functions\n", - "\n", - "The following code takes a RetrievalQA chain, testset dataframe, and the metrics to be evaluated and returns a dataframe including the metrics calculated." - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from datasets import Dataset\n", - "from ragas import evaluate\n", - "from ragas.run_config import RunConfig\n", - "\n", - "def parse_contexts(source_docs):\n", - " return [doc.page_content for doc in source_docs]\n", - "\n", - "def create_evaluation_dataset(chain, testset):\n", - " res_set = {\n", - " \"question\": [],\n", - " \"answer\": [],\n", - " \"contexts\": [],\n", - " \"ground_truth\": []\n", - " }\n", - "\n", - " for _, row in testset.iterrows():\n", - " result = chain.invoke({\"input\": row[\"question\"]})\n", - "\n", - " res_set[\"question\"].append(row[\"question\"])\n", - " res_set[\"answer\"].append(result[\"answer\"])\n", - "\n", - " contexts = parse_contexts(result[\"context\"])\n", - "\n", - " if not len(contexts):\n", - " print(f\"no contexts found for question: {row['question']}\")\n", - " res_set[\"contexts\"].append(contexts)\n", - " res_set[\"ground_truth\"].append(str(row[\"ground_truth\"]))\n", - "\n", - " return Dataset.from_dict(res_set)\n", - "\n", - "def evaluate_dataset(eval_dataset, metrics, llm, embeddings):\n", - "\n", - " run_config = RunConfig(max_retries=1) # see ragas docs for more run_config options\n", - "\n", - " eval_result = evaluate(\n", - " eval_dataset,\n", - " metrics=metrics,\n", - " run_config=run_config,\n", - " llm=llm,\n", - " embeddings=embeddings\n", - " )\n", - "\n", - " eval_df = eval_result.to_pandas()\n", - " return eval_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create the evaluation data\n", - "\n", - "Input: chain to be evaluated and a pregenerated test set
\n", - "Output: dataset formatted for use with ragas evaluation function" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questioncontextsground_truthevolution_typemetadataepisode_done
0What are short-term investments and how are th...[\"CASH AND EQUIVALENTS Cash and equivalents re...Short-term investments are highly liquid inves...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
1What are some of the risks and uncertainties a...['Our NIKE Direct operations, including our re...Many factors unique to retail operations, some...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
2What is NIKE's policy regarding securities ana...[\"Investors should also be aware that while NI...NIKE's policy is to not disclose any material ...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
3What are the revenues for the Footwear and App...['(Dollars in millions, except per share data)...The revenues for the Footwear and Apparel cate...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
4How do master netting arrangements impact the ...[\"The Company records the assets and liabiliti...The Company records the assets and liabilities...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
\n", - "
" - ], - "text/plain": [ - " question \\\n", - "0 What are short-term investments and how are th... \n", - "1 What are some of the risks and uncertainties a... \n", - "2 What is NIKE's policy regarding securities ana... \n", - "3 What are the revenues for the Footwear and App... \n", - "4 How do master netting arrangements impact the ... \n", - "\n", - " contexts \\\n", - "0 [\"CASH AND EQUIVALENTS Cash and equivalents re... \n", - "1 ['Our NIKE Direct operations, including our re... \n", - "2 [\"Investors should also be aware that while NI... \n", - "3 ['(Dollars in millions, except per share data)... \n", - "4 [\"The Company records the assets and liabiliti... \n", - "\n", - " ground_truth evolution_type \\\n", - "0 Short-term investments are highly liquid inves... simple \n", - "1 Many factors unique to retail operations, some... simple \n", - "2 NIKE's policy is to not disclose any material ... simple \n", - "3 The revenues for the Footwear and Apparel cate... simple \n", - "4 The Company records the assets and liabilities... simple \n", - "\n", - " metadata episode_done \n", - "0 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", - "1 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", - "2 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", - "3 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", - "4 [{'source': 'resources/nke-10k-2023.pdf'}] True " - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "testset_df = pd.read_csv(\"resources/testset_15.csv\")\n", - "testset_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [], - "source": [ - "eval_dataset = create_evaluation_dataset(rag_chain, testset_df)\n", - "eval_dataset.to_pandas().shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluate generation metrics\n", - "Generation metrics quantify how well the RAG app did creating answers to the provided questions (i.e. the G in **R**etrival **A**ugments **G**eneration). We will calculate the generation metrics **faithfulness** and **answer relevancy** for this example.\n", - "\n", - "The ragas libary conveniently abstracts the calculation of these metrics so we don't have to write redundant code but please review the following definitions in order to build intuition around what these metrics actually measure.\n", - "\n", - "Note: the following examples are paraphrased from the [ragas docs](https://docs.ragas.io/en/stable/concepts/metrics/index.html)\n", - "\n", - "------\n", - "\n", - "### Faithfulness\n", - "\n", - "An answer to a question can be said to be \"faithful\" if the **claims** that are made in the answer **can be inferred** from the **context**.\n", - "\n", - "#### Mathematically:\n", - "\n", - "$$\n", - "Faithfullness\\ score = \\frac{Number\\ of\\ claims\\ in\\ the\\ generated\\ answer\\ that\\ can\\ be\\ inferred\\ from\\ the\\ given\\ context}{Total\\ number\\ of\\ claim\\ in\\ the\\ generated\\ answer}\n", - "$$\n", - "\n", - "#### Example process:\n", - "\n", - "> Question: Where and when was Einstein born?\n", - "> \n", - "> Context: Albert Einstein (born 14 March 1879) was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time\n", - ">\n", - "> answer: Einstein was born in Germany on 20th March 1879.\n", - "\n", - "Step 1: Use LLM to break generated answer into individual statements.\n", - "- “Einstein was born in Germany.”\n", - "- “Einstein was born on 20th March 1879.”\n", - "\n", - "Step 2: For each statement use LLM to verify if it can be inferred from the context.\n", - "- “Einstein was born in Germany.” => yes. \n", - "- “Einstein was born on 20th March 1879.” => no.\n", - "\n", - "Step 3: plug into formula\n", - "\n", - "Number of claims inferred from context = 1\n", - "Total number of claims = 2\n", - "Faithfulness = 1/2\n", - "\n", - "### Answer Relevance\n", - "\n", - "An answer can be said to be relevant if it directly addresses the question (intuitively).\n", - "\n", - "#### Example process:\n", - "\n", - "1. Use an LLM to generate \"hypothetical\" questions to a given answer with the following prompt:\n", - "\n", - " > Generate a question for the given answer.\n", - " > answer: [answer]\n", - "\n", - "2. Embed the generated \"hypothetical\" questions as vectors.\n", - "3. Calculate the cosine similarity of the hypothetical questions and the original question, sum those similarities, and divide by n.\n", - "\n", - "With data:\n", - "\n", - "> Question: Where is France and what is it’s capital?\n", - "> \n", - "> answer: France is in western Europe.\n", - "\n", - "Step 1 - use LLM to create 'n' variants of question from the generated answer.\n", - "\n", - "- “In which part of Europe is France located?”\n", - "- “What is the geographical location of France within Europe?”\n", - "- “Can you identify the region of Europe where France is situated?”\n", - "\n", - "Step 2 - Calculate the mean cosine similarity between the generated questions and the actual question.\n", - "\n", - "## Now let's implement using our helper functions\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "dd9cabb4b0c448b08cad96d2ef3391a2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Evaluating: 0%| | 0/15 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
faithfulnessanswer_relevancy
count15.00000015.000000
mean0.7812290.938581
std0.3626660.085342
min0.0000000.736997
25%0.6527780.926596
50%1.0000000.975230
75%1.0000000.994168
max1.0000001.000000
\n", - "" - ], - "text/plain": [ - " faithfulness answer_relevancy\n", - "count 15.000000 15.000000\n", - "mean 0.781229 0.938581\n", - "std 0.362666 0.085342\n", - "min 0.000000 0.736997\n", - "25% 0.652778 0.926596\n", - "50% 1.000000 0.975230\n", - "75% 1.000000 0.994168\n", - "max 1.000000 1.000000" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gen_metrics_default = faithfulness_metrics\n", - "gen_metrics_default[\"answer_relevancy\"] = answer_relevancy_metrics[\"answer_relevancy\"]\n", - "\n", - "gen_metrics_default.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluating retrieval metrics\n", - "\n", - "Retrieval metrics quantify how well the system performed at fetching the best possible context for generation. Like before please review the definitions below to understand what happens under-the-hood when we execute the evaluation code. \n", - "\n", - "-----\n", - "\n", - "### Context Relevance\n", - "\n", - "\"The context is considered relevant to the extent that it exclusively contains information that is needed to answer the question.\"\n", - "\n", - "#### Example process:\n", - "\n", - "1. Use the following LLM prompt to extract a subset of sentences necessary to answer the question. The context is defined as the formatted search result from the vector database.\n", - "\n", - " > Please extract relevant sentences from\n", - " > the provided context that can potentially\n", - " > help answer the following `{question}`. If no\n", - " > relevant sentences are found, or if you\n", - " > believe the question cannot be answered\n", - " > from the given context, return the phrase\n", - " > \"Insufficient Information\". While extracting candidate sentences you’re not allowed to make any changes to sentences\n", - " > from given `{context}`.\n", - "\n", - "2. Compute the context relevance score = (number of extracted sentences) / (total number of sentences in context)\n", - "\n", - "Moving from the initial paper to the active evaluation library ragas there are a few more insightful metrics to evaluate. From the library [source](https://docs.ragas.io/en/stable/concepts/metrics/index.html) let's introduce `context precision` and `context recall`. \n", - "\n", - "### Context recall\n", - "Context can be said to have high recall if retrieved context aligns with the ground truth answer.\n", - "\n", - "#### Mathematically:\n", - "\n", - "$$\n", - "Context\\ recall = \\frac{Ground\\ Truth\\ sentences\\ that\\ can\\ be\\ attributed\\ to\\ context}{Total\\ number\\ of\\ sentences\\ in\\ the\\ ground\\ truth}\n", - "$$\n", - "\n", - "#### Example process:\n", - "\n", - "Data:\n", - "> question: Where is France and what is it’s capital?\n", - "> ground truth answer: France is in Western Europe and its capital is Paris.\n", - "> context: France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. The country is also renowned for its wines and sophisticated cuisine. Lascaux’s ancient cave drawings, Lyon’s Roman theater and the vast Palace of Versailles attest to its rich history.\n", - ">\n", - "> Note: ground truth answer can be created by critic LLM or with own human labeled data set.\n", - "\n", - "Step 1 - use an LLM to break the ground truth down into individual statements:\n", - "- `France is in Western Europe`\n", - "- `Its capital is Paris`\n", - "\n", - "Step 2 - for each ground truth statement, use an LLM to determine if it can be attributed from the context.\n", - "- `France is in Western Europe` => yes\n", - "- `Its capital is Paris` => no\n", - "\n", - "\n", - "Step 3 - plug in to formula\n", - "\n", - "context recall = (1 + 0) / 2 = 0.5\n", - "\n", - "### Context precision\n", - "\n", - "This metrics relates to how chunks are ranked in a response. Ideally the most relevant chunks are at the top.\n", - "\n", - "#### Mathematically:\n", - "\n", - "$$\n", - "Context\\ Precision@k = \\frac{precision@k}{total\\ number\\ relevant\\ items\\ in\\ the\\ top\\ k\\ results}\n", - "$$\n", - "\n", - "$$\n", - "Precision@k = \\frac{true\\ positive@k}{true\\ positives@k + false\\ positives@k}\n", - "$$\n", - "\n", - "#### Example process:\n", - "\n", - "Data:\n", - "> Question: Where is France and what is it’s capital?\n", - "> \n", - "> Ground truth: France is in Western Europe and its capital is Paris.\n", - "> \n", - "> Context: [ “The country is also renowned for its wines and sophisticated cuisine. Lascaux’s ancient cave drawings, Lyon’s Roman theater and”, “France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. Paris, its capital, is famed for its fashion houses, classical art museums including the Louvre and monuments like the Eiffel Tower”]\n", - "\n", - "Step 1 - for each chunk use the LLM to check if it's relevant or not to the ground truth answer.\n", - "\n", - "Step 2 - for each chunk in the context calculate the precision defined as: ``\n", - "- `“The country is also renowned for its wines and sophisticated cuisine. Lascaux’s ancient cave drawings, Lyon’s Roman theater and”` => precision = 0/1 or 0.\n", - "- `“France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. Paris, its capital, is famed for its fashion houses, classical art museums including the Louvre and monuments like the Eiffel Tower”` => the precision would be (1) / (1 true positive + 1 false positive) = 0.5. \n", - "\n", - "\n", - "Step 3 - calculate the overall context precision = (0 + 0.5) / 1 = 0.5" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c076c3dc42cf49cf8d768dec225727d5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Evaluating: 0%| | 0/15 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
context_recallcontext_precision
count15.00000015.000000
mean0.9666670.925926
std0.1290990.145352
min0.5000000.500000
25%1.0000000.916667
50%1.0000001.000000
75%1.0000001.000000
max1.0000001.000000
\n", - "" - ], - "text/plain": [ - " context_recall context_precision\n", - "count 15.000000 15.000000\n", - "mean 0.966667 0.925926\n", - "std 0.129099 0.145352\n", - "min 0.500000 0.500000\n", - "25% 1.000000 0.916667\n", - "50% 1.000000 1.000000\n", - "75% 1.000000 1.000000\n", - "max 1.000000 1.000000" - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ret_metrics_default = context_recall_metrics\n", - "ret_metrics_default[\"context_precision\"] = context_precision_metrics[\"context_precision\"]\n", - "\n", - "ret_metrics_default.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [], - "source": [ - "metrics = ret_metrics_default\n", - "metrics[\"faithfulness\"] = gen_metrics_default[\"faithfulness\"]\n", - "metrics[\"answer_relevancy\"] = gen_metrics_default[\"answer_relevancy\"]\n", - "\n", - "metrics.to_csv(f\"resources/metrics_{CHUNK_SIZE}_{CHUNK_OVERLAP}.csv\", index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# All together" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
context_recallcontext_precisionfaithfulnessanswer_relevancy
count15.00000015.00000015.00000015.000000
mean0.9666670.9259260.7812290.938581
std0.1290990.1453520.3626660.085342
min0.5000000.5000000.0000000.736997
25%1.0000000.9166670.6527780.926596
50%1.0000001.0000001.0000000.975230
75%1.0000001.0000001.0000000.994168
max1.0000001.0000001.0000001.000000
\n", - "
" - ], - "text/plain": [ - " context_recall context_precision faithfulness answer_relevancy\n", - "count 15.000000 15.000000 15.000000 15.000000\n", - "mean 0.966667 0.925926 0.781229 0.938581\n", - "std 0.129099 0.145352 0.362666 0.085342\n", - "min 0.500000 0.500000 0.000000 0.736997\n", - "25% 1.000000 0.916667 0.652778 0.926596\n", - "50% 1.000000 1.000000 1.000000 0.975230\n", - "75% 1.000000 1.000000 1.000000 0.994168\n", - "max 1.000000 1.000000 1.000000 1.000000" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metrics.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Analysis\n", - "Overall our RAG app showed pretty good performance. All values indicated above 0.6, which from anecdotal experience, is a reasonable lower-bound for performance however obviously higher values are more ideal. It is worth noting that generation metrics can be a bit more hazy in terms of ideal ranges since the LLM evaluation cannot yet capture the way a response feels to a user. For these metrics it's important to make sure they are not severely low however blind optimization to the top can result in a very uncreative chat experience which may or may not be ideal for the intended use case.\n", - "\n", - "## Review\n", - "\n", - "- we initialized our RAG app with data from a 10k document\n", - "- generated a testset to evaluate \n", - "- calculated both retrieval and generation metrics\n", - "\n", - "## Next steps\n", - "\n", - "Now that we know how to measure our system we can quickly and easily experiment with different techniques with a baseline in place to improve our systems.\n", - "\n", - "## Cleanup" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [], - "source": [ - "from redisvl.index import SearchIndex\n", - "\n", - "idx = SearchIndex.from_existing(\n", - " index_name,\n", - " redis_url=REDIS_URL\n", - ")\n", - "\n", - "idx.delete()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "# Evaluating RAG\n", + "\n", + "This notebook uses the [ragas library](https://docs.ragas.io/en/stable/getstarted/index.html) and [Redis](https://redis.com) to evaluate the performance of sample RAG application. Also see the original [source paper](https://arxiv.org/pdf/2309.15217) to build a more detailed understanding.\n", + "\n", + "## Let's Begin!\n", + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To start, we need a RAG app to evaluate. Let's create one using LangChain and connect it with Redis as the vector DB.\n", + "\n", + "## Init redis, data prep, and populating the vector DB" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install -q redis \"unstructured[pdf]\" sentence-transformers langchain \"langchain-redis>=0.2.0\" langchain-huggingface langchain-openai ragas datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Running Redis in Colab\n", + "Use the shell script below to download, extract, and install [Redis Stack](https://redis.io/docs/getting-started/install-stack/) directly from the Redis package archive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# NBVAL_SKIP\n", + "%%sh\n", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", + "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", + "sudo apt-get update > /dev/null 2>&1\n", + "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", + "redis-stack-server --daemonize yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For Alternative Environments\n", + "There are many ways to get the necessary redis-stack instance running\n", + "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n", + "own version of Redis Enterprise running, that works too!\n", + "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n", + "3. With docker: `docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Replace values below with your own if using Redis Cloud instance\n", + "REDIS_HOST = os.getenv(\"REDIS_HOST\", \"localhost\") # ex: \"redis-18374.c253.us-central1-1.gce.cloud.redislabs.com\"\n", + "REDIS_PORT = os.getenv(\"REDIS_PORT\", \"6379\") # ex: 18374\n", + "REDIS_PASSWORD = os.getenv(\"REDIS_PASSWORD\", \"\") # ex: \"1TNxTEdYRDgIDKM2gDfasupCADXXXX\"\n", + "\n", + "# If SSL is enabled on the endpoint, use rediss:// as the URL prefix\n", + "REDIS_URL = f\"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "\n", + "CHUNK_SIZE = 2500\n", + "CHUNK_OVERLAP = 0\n", + "\n", + "# pdf to load\n", + "path = 'resources/nke-10k-2023.pdf'\n", + "assert os.path.exists(path), f\"File not found: {path}\"\n", + "\n", + "# load and split\n", + "loader = PyPDFLoader(path)\n", + "pages = loader.load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)\n", + "chunks = text_splitter.split_documents(pages)\n", + "\n", + "print(\"Done preprocessing. Created\", len(chunks), \"chunks of the original pdf\", path)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Table of ContentsUNITED STATESSECURITIES AND EXCHANGE COMMISSIONWashington, D.C. 20549FORM 10-K(Mark One)☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934FOR THE FISCAL YEAR ENDED MAY 31, 2023OR☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934FOR THE TRANSITION PERIOD FROM TO .Commission File No. 1-10635\\n\\nAs of November 30, 2022, the aggregate market values of the Registrant's Common Stock held by non-affiliates were:Class A$7,831,564,572 Class B136,467,702,472 $144,299,267,044\\n\\nNIKE, Inc.(Exact name of Registrant as specified in its charter)Oregon93-0584541(State or other jurisdiction of incorporation)(IRS Employer Identification No.)One Bowerman Drive, Beaverton, Oregon 97005-6453(Address of principal executive offices and zip code)(503) 671-6453(Registrant's telephone number, including area code)SECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:Class B Common StockNKENew York Stock Exchange(Title of each class)(Trading symbol)(Name of each exchange on which registered)SECURITIES REGISTERED PURSUANT TO SECTION 12(G) OF THE ACT:NONE\")" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chunks[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "\n", + "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_redis import RedisVectorStore\n", + "\n", + "# set the index name for this example\n", + "index_name = \"ragas_ex\"\n", + "\n", + "# construct the vector store class from texts and metadata\n", + "rds = RedisVectorStore.from_documents(\n", + " chunks,\n", + " embeddings,\n", + " index_name=index_name,\n", + " redis_url=REDIS_URL,\n", + " metadata_schema=[\n", + " {\n", + " \"name\": \"source\",\n", + " \"type\": \"text\"\n", + " },\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test the vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rds.similarity_search(\"What was nike's revenue last year?\")[0].page_content" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup RAG\n", + "\n", + "Now that the vector db is populated let's initialize our RAG app." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "if \"OPENAI_API_KEY\" not in os.environ:\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OPENAI_API_KEY\")\n", + "\n", + "llm = ChatOpenAI(\n", + " openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n", + " model=\"gpt-3.5-turbo-16k\",\n", + " max_tokens=None\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "system_prompt = \"\"\"\n", + " Use the following pieces of context from financial 10k filings data to answer the user question at the end. \n", + " If you don't know the answer, say that you don't know, don't try to make up an answer.\n", + "\n", + " Context:\n", + " ---------\n", + " {context}\n", + "\"\"\"\n", + "\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", system_prompt),\n", + " (\"human\", \"{input}\")\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test it out" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input': \"What was nike's revenue last year?\",\n", + " 'context': [Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content='As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company\\'s internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.\\n\\nThe breakdown of Revenues is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023 FISCAL 2022\\n\\n% CHANGE\\n\\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\\n\\n% CHANGE\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n21,608 $ 13,418 7,248\\n\\n18,353 12,479 7,547\\n\\n18 % 8 % -4 %\\n\\n18 % $ 21 % 4 %\\n\\n17,179 11,456 8,290\\n\\n7 % 9 % -9 %\\n\\nAsia Pacific & Latin America Global Brand Divisions\\n\\n(3)\\n\\n(2)\\n\\n6,431 58\\n\\n5,955 102\\n\\n8 % -43 %\\n\\n17 % -43 %\\n\\n5,343 25\\n\\n11 % 308 %\\n\\nTOTAL NIKE BRAND Converse\\n\\n$\\n\\n48,763 $ 2,427\\n\\n44,436 2,346\\n\\n10 % 3 %\\n\\n16 % $ 8 %\\n\\n42,293 2,205\\n\\n5 % 6 %\\n\\n(4)\\n\\nCorporate TOTAL NIKE, INC. REVENUES\\n\\n$\\n\\n27\\n\\n51,217 $\\n\\n(72) 46,710\\n\\n— 10 %\\n\\n— 16 % $\\n\\n40 44,538\\n\\n— 5 %\\n\\n(1) The percent change excluding currency changes represents a non-GAAP financial measure. For further information, see \"Use of Non-GAAP Financial Measures\".\\n\\n(2) For additional information on the transition of our NIKE Brand businesses within our CASA territory to a third-party distributor, see Note 18 — Acquisitions and Divestitures of the Notes to Consolidated\\n\\nFinancial Statements contained in Item 8 of this Annual Report.\\n\\n(3) Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment.\\n\\n(4) Corporate revenues primarily consist of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse, but\\n\\nmanaged through our central foreign exchange risk management program.\\n\\nThe primary financial measure used by the Company to evaluate performance is Earnings Before Interest and Taxes (\"EBIT\"). As discussed in Note 15 — Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, certain corporate costs are not included in EBIT.\\n\\nThe breakdown of EBIT is as follows:\\n\\n(Dollars in millions)\\n\\nFISCAL 2023\\n\\nFISCAL 2022\\n\\n% CHANGE\\n\\nFISCAL 2021\\n\\nNorth America Europe, Middle East & Africa Greater China\\n\\n$\\n\\n5,454 3,531 2,283\\n\\n$\\n\\n5,114 3,293 2,365\\n\\n7 % $ 7 % -3 %\\n\\n5,089 2,435 3,243\\n\\nAsia Pacific & Latin America Global Brand Divisions (1)'),\n", + " Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"NIKE, INC. CONSOLIDATED STATEMENTS OF INCOME\\n\\n(In millions, except per share data)\\n\\nRevenues Cost of sales\\n\\nGross profit\\n\\nDemand creation expense Operating overhead expense\\n\\nTotal selling and administrative expense\\n\\nInterest expense (income), net\\n\\nOther (income) expense, net Income before income taxes\\n\\nIncome tax expense NET INCOME\\n\\nEarnings per common share:\\n\\nBasic Diluted\\n\\nWeighted average common shares outstanding:\\n\\nBasic Diluted\\n\\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n\\n$\\n\\n$\\n\\n$ $\\n\\nYEAR ENDED MAY 31,\\n\\n2023\\n\\n2022\\n\\n2021\\n\\n51,217 $ 28,925\\n\\n46,710 $ 25,231\\n\\n44,538 24,576\\n\\n22,292 4,060 12,317\\n\\n21,479 3,850 10,954\\n\\n19,962 3,114 9,911\\n\\n16,377 (6)\\n\\n14,804 205\\n\\n13,025 262\\n\\n(280) 6,201\\n\\n(181) 6,651\\n\\n14 6,661\\n\\n1,131 5,070 $\\n\\n605 6,046 $\\n\\n934 5,727\\n\\n3.27 $ 3.23 $\\n\\n3.83 $ 3.75 $\\n\\n3.64 3.56\\n\\n1,551.6 1,569.8\\n\\n1,578.8 1,610.8\\n\\n1,573.0 1,609.4\\n\\n2023 FORM 10-K 55\\n\\nTable of Contents\\n\\nNIKE, INC. CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME\\n\\nYEAR ENDED MAY 31,\\n\\n(Dollars in millions)\\n\\n2023\\n\\n2022\\n\\nNet income Other comprehensive income (loss), net of tax:\\n\\n$\\n\\n5,070 $\\n\\n6,046 $\\n\\nChange in net foreign currency translation adjustment\\n\\n267\\n\\n(522)\\n\\nChange in net gains (losses) on cash flow hedges Change in net gains (losses) on other\\n\\n(348) (6)\\n\\n1,214 6\\n\\nTotal other comprehensive income (loss), net of tax TOTAL COMPREHENSIVE INCOME\\n\\n$\\n\\n(87) 4,983 $\\n\\n698 6,744 $\\n\\nThe accompanying Notes to the Consolidated Financial Statements are an integral part of this statement.\\n\\n2023 FORM 10-K 56\\n\\n2021\\n\\n5,727\\n\\n496\\n\\n(825) 5\\n\\n(324) 5,403\\n\\nTable of Contents\\n\\nNIKE, INC. CONSOLIDATED BALANCE SHEETS\\n\\n(In millions)\\n\\nASSETS\\n\\nCurrent assets:\\n\\nCash and equivalents Short-term investments\\n\\nAccounts receivable, net Inventories Prepaid expenses and other current assets\\n\\nTotal current assets\\n\\nProperty, plant and equipment, net\\n\\nOperating lease right-of-use assets, net Identifiable intangible assets, net Goodwill\\n\\nDeferred income taxes and other assets\\n\\nTOTAL ASSETS\\n\\nLIABILITIES AND SHAREHOLDERS' EQUITY Current liabilities:\\n\\nCurrent portion of long-term debt Notes payable Accounts payable\\n\\nCurrent portion of operating lease liabilities Accrued liabilities Income taxes payable\\n\\nTotal current liabilities\\n\\nLong-term debt\\n\\nOperating lease liabilities Deferred income taxes and other liabilities Commitments and contingencies (Note 16)\\n\\nRedeemable preferred stock Shareholders' equity: Common stock at stated value:\"),\n", + " Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"Tax (expense) benefit Gain (loss) net of tax\\n\\n5 (14)\\n\\n(9) 22\\n\\nTotal net gain (loss) reclassified for the period\\n\\n$\\n\\n463 $\\n\\n30\\n\\n2023 FORM 10-K 82\\n\\nTable of Contents\\n\\nNOTE 14 — REVENUES\\n\\nDISAGGREGATION OF REVENUES The following tables present the Company's Revenues disaggregated by reportable operating segment, major product line and distribution channel:\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nYEAR ENDED MAY 31, 2023 ASIA PACIFIC & LATIN (1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nAMERICA\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear\\n\\n$\\n\\n14,897 $\\n\\n8,260 $\\n\\n5,435 $\\n\\n4,543 $\\n\\n— $\\n\\n33,135 $\\n\\n2,155 $\\n\\n— $\\n\\n35,290\\n\\nApparel Equipment Other\\n\\n5,947 764 —\\n\\n4,566 592 —\\n\\n1,666 147 —\\n\\n1,664 224 —\\n\\n— — 58\\n\\n13,843 1,727 58\\n\\n90 28 154\\n\\n— — 27\\n\\n13,933 1,755 239\\n\\nTOTAL REVENUES\\n\\n$\\n\\n21,608 $\\n\\n13,418 $\\n\\n7,248 $\\n\\n6,431 $\\n\\n58 $\\n\\n48,763 $\\n\\n2,427 $\\n\\n27 $\\n\\n51,217\\n\\nRevenues by:\\n\\nSales to Wholesale Customers Sales through Direct to Consumer\\n\\n$\\n\\n11,273 $ 10,335\\n\\n8,522 $ 4,896\\n\\n3,866 $ 3,382\\n\\n3,736 $ 2,695\\n\\n— $ —\\n\\n27,397 $ 21,308\\n\\n1,299 $ 974\\n\\n— $ —\\n\\n28,696 22,282\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n21,608 $\\n\\n—\\n\\n13,418 $\\n\\n— 7,248 $\\n\\n— 6,431 $\\n\\n58 58 $\\n\\n58\\n\\n48,763 $\\n\\n154 2,427 $\\n\\n27 27 $\\n\\n239 51,217\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand businesses in its CASA territory to third-party distributors.\\n\\nYEAR ENDED MAY 31, 2022\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\\n\\nASIA PACIFIC & LATIN AMERICA\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE\\n\\nBRAND CONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by: Footwear Apparel\\n\\n$\\n\\n12,228 $ 5,492\\n\\n7,388 $ 4,527\\n\\n5,416 $ 1,938\\n\\n4,111 $ 1,610\\n\\n— $ —\\n\\n29,143 $ 13,567\\n\\n2,094 $ 103\\n\\n— $ —\\n\\n31,237 13,670\\n\\nEquipment Other\\n\\n633 —\\n\\n564 —\\n\\n193 —\\n\\n234 —\\n\\n— 102\\n\\n1,624 102\\n\\n26 123\\n\\n— (72)\\n\\n1,650 153\\n\\nTOTAL REVENUES Revenues by:\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\nSales to Wholesale Customers Sales through Direct to Consumer Other\\n\\n$\\n\\n9,621 $ 8,732 —\\n\\n8,377 $ 4,102 —\\n\\n4,081 $ 3,466 —\\n\\n3,529 $ 2,426 —\\n\\n— $ — 102\\n\\n25,608 $ 18,726 102\\n\\n1,292 $ 931 123\\n\\n— $ — (72)\\n\\n26,900 19,657 153\\n\\nTOTAL REVENUES\\n\\n$\\n\\n18,353 $\\n\\n12,479 $\\n\\n7,547 $\\n\\n5,955 $\\n\\n102 $\\n\\n44,436 $\\n\\n2,346 $\\n\\n(72) $\\n\\n46,710\\n\\n2023 FORM 10-K 83\\n\\nTable of Contents\\n\\nYEAR ENDED MAY 31, 2021\\n\\n(Dollars in millions)\\n\\nNORTH AMERICA\\n\\nEUROPE, MIDDLE EAST & AFRICA\\n\\nGREATER CHINA\"),\n", + " Document(metadata={'source': 'resources/nke-10k-2023.pdf'}, page_content=\"ASIA PACIFIC & LATIN AMERICA\\n\\n(1)\\n\\nGLOBAL BRAND DIVISIONS\\n\\nTOTAL NIKE BRAND\\n\\nCONVERSE CORPORATE\\n\\nTOTAL NIKE, INC.\\n\\nRevenues by:\\n\\nFootwear Apparel Equipment\\n\\n$\\n\\n11,644 $ 5,028 507\\n\\n6,970 $ 3,996 490\\n\\n5,748 $ 2,347 195\\n\\n3,659 $ 1,494 190\\n\\n— $ — —\\n\\n28,021 $ 12,865 1,382\\n\\n1,986 $ 104 29\\n\\n— $ — —\\n\\n30,007 12,969 1,411\\n\\nOther\\n\\nTOTAL REVENUES\\n\\n$\\n\\n—\\n\\n17,179 $\\n\\n—\\n\\n11,456 $\\n\\n— 8,290 $\\n\\n— 5,343 $\\n\\n25 25 $\\n\\n25\\n\\n42,293 $\\n\\n86 2,205 $\\n\\n40 40 $\\n\\n151 44,538\\n\\nRevenues by:\\n\\nSales to Wholesale Customers $\\n\\n10,186 $\\n\\n7,812 $\\n\\n4,513 $\\n\\n3,387 $\\n\\n— $\\n\\n25,898 $\\n\\n1,353 $\\n\\n— $\\n\\n27,251\\n\\nSales through Direct to Consumer Other\\n\\n6,993 —\\n\\n3,644 —\\n\\n3,777 —\\n\\n1,956 —\\n\\n— 25\\n\\n16,370 25\\n\\n766 86\\n\\n— 40\\n\\n17,136 151\\n\\nTOTAL REVENUES\\n\\n$\\n\\n17,179 $\\n\\n11,456 $\\n\\n8,290 $\\n\\n5,343 $\\n\\n25 $\\n\\n42,293 $\\n\\n2,205 $\\n\\n40 $\\n\\n44,538\\n\\n(1) Refer to Note 18 — Acquisitions and Divestitures for additional information on the transition of the Company's NIKE Brand business in Brazil to a third-party distributor.\\n\\nFor the fiscal years ended May 31, 2023, 2022 and 2021, Global Brand Divisions revenues include NIKE Brand licensing and other miscellaneous revenues that are not part of a geographic operating segment. Converse Other revenues were primarily attributable to licensing businesses. Corporate revenues primarily consisted of foreign currency hedge gains and losses related to revenues generated by entities within the NIKE Brand geographic operating segments and Converse but managed through the Company's central foreign exchange risk management program.\\n\\nAs of May 31, 2023 and 2022, the Company did not have any contract assets and had an immaterial amount of contract liabilities recorded in Accrued liabilities on the Consolidated Balance Sheets.\\n\\nSALES-RELATED RESERVES\\n\\nAs of May 31, 2023 and 2022, the Company's sales-related reserve balance, which includes returns, post-invoice sales discounts and miscellaneous claims, was $994 million and $1,015 million, respectively, recorded in Accrued liabilities on the Consolidated Balance Sheets. The estimated cost of inventory for expected product returns was $226 million and $194 million as of May 31, 2023 and 2022, respectively, and was recorded in Prepaid expenses and other current assets on the Consolidated Balance Sheets.\\n\\nNOTE 15 — OPERATING SEGMENTS AND RELATED INFORMATION\")],\n", + " 'answer': \"Nike's revenue last year was $51,217 million.\"}" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chains import create_retrieval_chain\n", + "from langchain.chains.combine_documents import create_stuff_documents_chain\n", + "\n", + "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n", + "rag_chain = create_retrieval_chain(rds.as_retriever(), question_answer_chain)\n", + "\n", + "rag_chain.invoke({\"input\": \"What was nike's revenue last year?\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## (Optional) Creating a test set\n", + "\n", + "Now that our setup is complete and we have our RAG app to evaluate we need a test set to evaluate against. The ragas library provides a helpful class for generating a synthetic test set given our data as input that we will use here. The output of this generation is a set of `questions`, `contexts`, and `ground_truth`. \n", + "\n", + "The questions are generated by an LLM based on slices of context from the provided doc and the ground_truth is determined via a critic LLM. Note there is nothing special about this data itself and you can provide your own `questions` and `ground_truth` for evaluation purposes. When starting a project however, there is often a lack of quality human labeled data to be used for evaluation and a synthetic dataset is a valuable place to start if pre live user/process data (which should be incorporated as an ultimate goal).\n", + "\n", + "For more detail see [the docs](https://docs.ragas.io/en/stable/concepts/testset_generation.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# NBVAL_SKIP\n", + "# source: https://docs.ragas.io/en/latest/getstarted/testset_generation.html\n", + "from ragas.testset.generator import TestsetGenerator\n", + "from ragas.testset.evolutions import simple, reasoning, multi_context\n", + "from ragas.run_config import RunConfig\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", + "\n", + "run_config = RunConfig(\n", + " timeout=200,\n", + " max_wait=160,\n", + " max_retries=3,\n", + ")\n", + "\n", + "# generator with openai models\n", + "generator_llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\")\n", + "critic_llm = ChatOpenAI(model=\"gpt-4o-mini\")\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "generator = TestsetGenerator.from_langchain(\n", + " generator_llm,\n", + " critic_llm,\n", + " embeddings,\n", + " run_config=run_config,\n", + ")\n", + "\n", + "testset = generator.generate_with_langchain_docs(\n", + " chunks,\n", + " test_size=10,\n", + " distributions={\n", + " simple: 0.5,\n", + " reasoning: 0.25,\n", + " multi_context: 0.25\n", + " },\n", + " run_config=run_config\n", + ")\n", + "\n", + "# save to csv since this can be a time consuming process\n", + "testset.to_pandas().to_csv(\"resources/new_testset.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation helper functions\n", + "\n", + "The following code takes a RetrievalQA chain, testset dataframe, and the metrics to be evaluated and returns a dataframe including the metrics calculated." + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from datasets import Dataset\n", + "from ragas import evaluate\n", + "from ragas.run_config import RunConfig\n", + "\n", + "def parse_contexts(source_docs):\n", + " return [doc.page_content for doc in source_docs]\n", + "\n", + "def create_evaluation_dataset(chain, testset):\n", + " res_set = {\n", + " \"question\": [],\n", + " \"answer\": [],\n", + " \"contexts\": [],\n", + " \"ground_truth\": []\n", + " }\n", + "\n", + " for _, row in testset.iterrows():\n", + " result = chain.invoke({\"input\": row[\"question\"]})\n", + "\n", + " res_set[\"question\"].append(row[\"question\"])\n", + " res_set[\"answer\"].append(result[\"answer\"])\n", + "\n", + " contexts = parse_contexts(result[\"context\"])\n", + "\n", + " if not len(contexts):\n", + " print(f\"no contexts found for question: {row['question']}\")\n", + " res_set[\"contexts\"].append(contexts)\n", + " res_set[\"ground_truth\"].append(str(row[\"ground_truth\"]))\n", + "\n", + " return Dataset.from_dict(res_set)\n", + "\n", + "def evaluate_dataset(eval_dataset, metrics, llm, embeddings):\n", + "\n", + " run_config = RunConfig(max_retries=1) # see ragas docs for more run_config options\n", + "\n", + " eval_result = evaluate(\n", + " eval_dataset,\n", + " metrics=metrics,\n", + " run_config=run_config,\n", + " llm=llm,\n", + " embeddings=embeddings\n", + " )\n", + "\n", + " eval_df = eval_result.to_pandas()\n", + " return eval_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create the evaluation data\n", + "\n", + "Input: chain to be evaluated and a pregenerated test set
\n", + "Output: dataset formatted for use with ragas evaluation function" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questioncontextsground_truthevolution_typemetadataepisode_done
0What are short-term investments and how are th...[\"CASH AND EQUIVALENTS Cash and equivalents re...Short-term investments are highly liquid inves...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
1What are some of the risks and uncertainties a...['Our NIKE Direct operations, including our re...Many factors unique to retail operations, some...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
2What is NIKE's policy regarding securities ana...[\"Investors should also be aware that while NI...NIKE's policy is to not disclose any material ...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
3What are the revenues for the Footwear and App...['(Dollars in millions, except per share data)...The revenues for the Footwear and Apparel cate...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
4How do master netting arrangements impact the ...[\"The Company records the assets and liabiliti...The Company records the assets and liabilities...simple[{'source': 'resources/nke-10k-2023.pdf'}]True
\n", + "
" + ], + "text/plain": [ + " question \\\n", + "0 What are short-term investments and how are th... \n", + "1 What are some of the risks and uncertainties a... \n", + "2 What is NIKE's policy regarding securities ana... \n", + "3 What are the revenues for the Footwear and App... \n", + "4 How do master netting arrangements impact the ... \n", + "\n", + " contexts \\\n", + "0 [\"CASH AND EQUIVALENTS Cash and equivalents re... \n", + "1 ['Our NIKE Direct operations, including our re... \n", + "2 [\"Investors should also be aware that while NI... \n", + "3 ['(Dollars in millions, except per share data)... \n", + "4 [\"The Company records the assets and liabiliti... \n", + "\n", + " ground_truth evolution_type \\\n", + "0 Short-term investments are highly liquid inves... simple \n", + "1 Many factors unique to retail operations, some... simple \n", + "2 NIKE's policy is to not disclose any material ... simple \n", + "3 The revenues for the Footwear and Apparel cate... simple \n", + "4 The Company records the assets and liabilities... simple \n", + "\n", + " metadata episode_done \n", + "0 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", + "1 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", + "2 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", + "3 [{'source': 'resources/nke-10k-2023.pdf'}] True \n", + "4 [{'source': 'resources/nke-10k-2023.pdf'}] True " + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testset_df = pd.read_csv(\"resources/testset_15.csv\")\n", + "testset_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "eval_dataset = create_evaluation_dataset(rag_chain, testset_df)\n", + "eval_dataset.to_pandas().shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate generation metrics\n", + "Generation metrics quantify how well the RAG app did creating answers to the provided questions (i.e. the G in **R**etrival **A**ugments **G**eneration). We will calculate the generation metrics **faithfulness** and **answer relevancy** for this example.\n", + "\n", + "The ragas libary conveniently abstracts the calculation of these metrics so we don't have to write redundant code but please review the following definitions in order to build intuition around what these metrics actually measure.\n", + "\n", + "Note: the following examples are paraphrased from the [ragas docs](https://docs.ragas.io/en/stable/concepts/metrics/index.html)\n", + "\n", + "------\n", + "\n", + "### Faithfulness\n", + "\n", + "An answer to a question can be said to be \"faithful\" if the **claims** that are made in the answer **can be inferred** from the **context**.\n", + "\n", + "#### Mathematically:\n", + "\n", + "$$\n", + "Faithfullness\\ score = \\frac{Number\\ of\\ claims\\ in\\ the\\ generated\\ answer\\ that\\ can\\ be\\ inferred\\ from\\ the\\ given\\ context}{Total\\ number\\ of\\ claim\\ in\\ the\\ generated\\ answer}\n", + "$$\n", + "\n", + "#### Example process:\n", + "\n", + "> Question: Where and when was Einstein born?\n", + "> \n", + "> Context: Albert Einstein (born 14 March 1879) was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time\n", + ">\n", + "> answer: Einstein was born in Germany on 20th March 1879.\n", + "\n", + "Step 1: Use LLM to break generated answer into individual statements.\n", + "- “Einstein was born in Germany.”\n", + "- “Einstein was born on 20th March 1879.”\n", + "\n", + "Step 2: For each statement use LLM to verify if it can be inferred from the context.\n", + "- “Einstein was born in Germany.” => yes. \n", + "- “Einstein was born on 20th March 1879.” => no.\n", + "\n", + "Step 3: plug into formula\n", + "\n", + "Number of claims inferred from context = 1\n", + "Total number of claims = 2\n", + "Faithfulness = 1/2\n", + "\n", + "### Answer Relevance\n", + "\n", + "An answer can be said to be relevant if it directly addresses the question (intuitively).\n", + "\n", + "#### Example process:\n", + "\n", + "1. Use an LLM to generate \"hypothetical\" questions to a given answer with the following prompt:\n", + "\n", + " > Generate a question for the given answer.\n", + " > answer: [answer]\n", + "\n", + "2. Embed the generated \"hypothetical\" questions as vectors.\n", + "3. Calculate the cosine similarity of the hypothetical questions and the original question, sum those similarities, and divide by n.\n", + "\n", + "With data:\n", + "\n", + "> Question: Where is France and what is it’s capital?\n", + "> \n", + "> answer: France is in western Europe.\n", + "\n", + "Step 1 - use LLM to create 'n' variants of question from the generated answer.\n", + "\n", + "- “In which part of Europe is France located?”\n", + "- “What is the geographical location of France within Europe?”\n", + "- “Can you identify the region of Europe where France is situated?”\n", + "\n", + "Step 2 - Calculate the mean cosine similarity between the generated questions and the actual question.\n", + "\n", + "## Now let's implement using our helper functions\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dd9cabb4b0c448b08cad96d2ef3391a2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Evaluating: 0%| | 0/15 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
faithfulnessanswer_relevancy
count15.00000015.000000
mean0.7812290.938581
std0.3626660.085342
min0.0000000.736997
25%0.6527780.926596
50%1.0000000.975230
75%1.0000000.994168
max1.0000001.000000
\n", + "" + ], + "text/plain": [ + " faithfulness answer_relevancy\n", + "count 15.000000 15.000000\n", + "mean 0.781229 0.938581\n", + "std 0.362666 0.085342\n", + "min 0.000000 0.736997\n", + "25% 0.652778 0.926596\n", + "50% 1.000000 0.975230\n", + "75% 1.000000 0.994168\n", + "max 1.000000 1.000000" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gen_metrics_default = faithfulness_metrics\n", + "gen_metrics_default[\"answer_relevancy\"] = answer_relevancy_metrics[\"answer_relevancy\"]\n", + "\n", + "gen_metrics_default.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluating retrieval metrics\n", + "\n", + "Retrieval metrics quantify how well the system performed at fetching the best possible context for generation. Like before please review the definitions below to understand what happens under-the-hood when we execute the evaluation code. \n", + "\n", + "-----\n", + "\n", + "### Context Relevance\n", + "\n", + "\"The context is considered relevant to the extent that it exclusively contains information that is needed to answer the question.\"\n", + "\n", + "#### Example process:\n", + "\n", + "1. Use the following LLM prompt to extract a subset of sentences necessary to answer the question. The context is defined as the formatted search result from the vector database.\n", + "\n", + " > Please extract relevant sentences from\n", + " > the provided context that can potentially\n", + " > help answer the following `{question}`. If no\n", + " > relevant sentences are found, or if you\n", + " > believe the question cannot be answered\n", + " > from the given context, return the phrase\n", + " > \"Insufficient Information\". While extracting candidate sentences you’re not allowed to make any changes to sentences\n", + " > from given `{context}`.\n", + "\n", + "2. Compute the context relevance score = (number of extracted sentences) / (total number of sentences in context)\n", + "\n", + "Moving from the initial paper to the active evaluation library ragas there are a few more insightful metrics to evaluate. From the library [source](https://docs.ragas.io/en/stable/concepts/metrics/index.html) let's introduce `context precision` and `context recall`. \n", + "\n", + "### Context recall\n", + "Context can be said to have high recall if retrieved context aligns with the ground truth answer.\n", + "\n", + "#### Mathematically:\n", + "\n", + "$$\n", + "Context\\ recall = \\frac{Ground\\ Truth\\ sentences\\ that\\ can\\ be\\ attributed\\ to\\ context}{Total\\ number\\ of\\ sentences\\ in\\ the\\ ground\\ truth}\n", + "$$\n", + "\n", + "#### Example process:\n", + "\n", + "Data:\n", + "> question: Where is France and what is it’s capital?\n", + "> ground truth answer: France is in Western Europe and its capital is Paris.\n", + "> context: France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. The country is also renowned for its wines and sophisticated cuisine. Lascaux’s ancient cave drawings, Lyon’s Roman theater and the vast Palace of Versailles attest to its rich history.\n", + ">\n", + "> Note: ground truth answer can be created by critic LLM or with own human labeled data set.\n", + "\n", + "Step 1 - use an LLM to break the ground truth down into individual statements:\n", + "- `France is in Western Europe`\n", + "- `Its capital is Paris`\n", + "\n", + "Step 2 - for each ground truth statement, use an LLM to determine if it can be attributed from the context.\n", + "- `France is in Western Europe` => yes\n", + "- `Its capital is Paris` => no\n", + "\n", + "\n", + "Step 3 - plug in to formula\n", + "\n", + "context recall = (1 + 0) / 2 = 0.5\n", + "\n", + "### Context precision\n", + "\n", + "This metrics relates to how chunks are ranked in a response. Ideally the most relevant chunks are at the top.\n", + "\n", + "#### Mathematically:\n", + "\n", + "$$\n", + "Context\\ Precision@k = \\frac{precision@k}{total\\ number\\ relevant\\ items\\ in\\ the\\ top\\ k\\ results}\n", + "$$\n", + "\n", + "$$\n", + "Precision@k = \\frac{true\\ positive@k}{true\\ positives@k + false\\ positives@k}\n", + "$$\n", + "\n", + "#### Example process:\n", + "\n", + "Data:\n", + "> Question: Where is France and what is it’s capital?\n", + "> \n", + "> Ground truth: France is in Western Europe and its capital is Paris.\n", + "> \n", + "> Context: [ “The country is also renowned for its wines and sophisticated cuisine. Lascaux’s ancient cave drawings, Lyon’s Roman theater and”, “France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. Paris, its capital, is famed for its fashion houses, classical art museums including the Louvre and monuments like the Eiffel Tower”]\n", + "\n", + "Step 1 - for each chunk use the LLM to check if it's relevant or not to the ground truth answer.\n", + "\n", + "Step 2 - for each chunk in the context calculate the precision defined as: ``\n", + "- `“The country is also renowned for its wines and sophisticated cuisine. Lascaux’s ancient cave drawings, Lyon’s Roman theater and”` => precision = 0/1 or 0.\n", + "- `“France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. Paris, its capital, is famed for its fashion houses, classical art museums including the Louvre and monuments like the Eiffel Tower”` => the precision would be (1) / (1 true positive + 1 false positive) = 0.5. \n", + "\n", + "\n", + "Step 3 - calculate the overall context precision = (0 + 0.5) / 1 = 0.5" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c076c3dc42cf49cf8d768dec225727d5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Evaluating: 0%| | 0/15 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
context_recallcontext_precision
count15.00000015.000000
mean0.9666670.925926
std0.1290990.145352
min0.5000000.500000
25%1.0000000.916667
50%1.0000001.000000
75%1.0000001.000000
max1.0000001.000000
\n", + "" + ], + "text/plain": [ + " context_recall context_precision\n", + "count 15.000000 15.000000\n", + "mean 0.966667 0.925926\n", + "std 0.129099 0.145352\n", + "min 0.500000 0.500000\n", + "25% 1.000000 0.916667\n", + "50% 1.000000 1.000000\n", + "75% 1.000000 1.000000\n", + "max 1.000000 1.000000" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret_metrics_default = context_recall_metrics\n", + "ret_metrics_default[\"context_precision\"] = context_precision_metrics[\"context_precision\"]\n", + "\n", + "ret_metrics_default.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = ret_metrics_default\n", + "metrics[\"faithfulness\"] = gen_metrics_default[\"faithfulness\"]\n", + "metrics[\"answer_relevancy\"] = gen_metrics_default[\"answer_relevancy\"]\n", + "\n", + "metrics.to_csv(f\"resources/metrics_{CHUNK_SIZE}_{CHUNK_OVERLAP}.csv\", index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# All together" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
context_recallcontext_precisionfaithfulnessanswer_relevancy
count15.00000015.00000015.00000015.000000
mean0.9666670.9259260.7812290.938581
std0.1290990.1453520.3626660.085342
min0.5000000.5000000.0000000.736997
25%1.0000000.9166670.6527780.926596
50%1.0000001.0000001.0000000.975230
75%1.0000001.0000001.0000000.994168
max1.0000001.0000001.0000001.000000
\n", + "
" + ], + "text/plain": [ + " context_recall context_precision faithfulness answer_relevancy\n", + "count 15.000000 15.000000 15.000000 15.000000\n", + "mean 0.966667 0.925926 0.781229 0.938581\n", + "std 0.129099 0.145352 0.362666 0.085342\n", + "min 0.500000 0.500000 0.000000 0.736997\n", + "25% 1.000000 0.916667 0.652778 0.926596\n", + "50% 1.000000 1.000000 1.000000 0.975230\n", + "75% 1.000000 1.000000 1.000000 0.994168\n", + "max 1.000000 1.000000 1.000000 1.000000" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis\n", + "Overall our RAG app showed pretty good performance. All values indicated above 0.6, which from anecdotal experience, is a reasonable lower-bound for performance however obviously higher values are more ideal. It is worth noting that generation metrics can be a bit more hazy in terms of ideal ranges since the LLM evaluation cannot yet capture the way a response feels to a user. For these metrics it's important to make sure they are not severely low however blind optimization to the top can result in a very uncreative chat experience which may or may not be ideal for the intended use case.\n", + "\n", + "## Review\n", + "\n", + "- we initialized our RAG app with data from a 10k document\n", + "- generated a testset to evaluate \n", + "- calculated both retrieval and generation metrics\n", + "\n", + "## Next steps\n", + "\n", + "Now that we know how to measure our system we can quickly and easily experiment with different techniques with a baseline in place to improve our systems.\n", + "\n", + "## Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.index import SearchIndex\n", + "\n", + "idx = SearchIndex.from_existing(\n", + " index_name,\n", + " redis_url=REDIS_URL\n", + ")\n", + "\n", + "idx.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/python-recipes/RAG/07_user_role_based_rag.ipynb b/python-recipes/RAG/07_user_role_based_rag.ipynb index 34a2ee6e..f7b4466e 100644 --- a/python-recipes/RAG/07_user_role_based_rag.ipynb +++ b/python-recipes/RAG/07_user_role_based_rag.ipynb @@ -60,7 +60,7 @@ } ], "source": [ - "%pip install -q 'redisvl>=0.3.8' openai langchain-community pypdf" + "%pip install -q \"redisvl>=0.4.1\" openai langchain-community pypdf" ] }, { @@ -1780,7 +1780,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.10" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/python-recipes/agents/00_langgraph_redis_agentic_rag.ipynb b/python-recipes/agents/00_langgraph_redis_agentic_rag.ipynb index 8bc01444..f00a37ef 100644 --- a/python-recipes/agents/00_langgraph_redis_agentic_rag.ipynb +++ b/python-recipes/agents/00_langgraph_redis_agentic_rag.ipynb @@ -1,674 +1,674 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "1VK8QKOVG2Ek", - "metadata": { - "id": "1VK8QKOVG2Ek" - }, - "source": [ - "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", - "\n", - "# Agentic RAG with LangGraph and Redis\n", - "\n", - "\"Open\n", - "\n", - "This notebook demonstrates the implementation of a Retrieval Agent using LangGraph and LangChain components. It showcases a flexible question-answering system that combines document retrieval with language model generation. The system uses an LLM with access to a retriever tool, making decisions about when to retrieve information from an index. Redis is utilized as a vector store for efficient document retrieval and embedding storage. Key features include adaptive query rewriting, document relevance assessment, and multi-step processing. The notebook illustrates how LangGraph can be used to create a sophisticated workflow for handling complex queries, integrating retrieval, reasoning, and generation capabilities in a single system.\n", - "\n", - "[Retrieval Agents](https://python.langchain.com/docs/tutorials/qa_chat_history/#agents) are useful when we want to make decisions about whether to retrieve from an index.\n", - "\n", - "To implement a retrieval agent, we simply need to give an LLM access to a retriever tool.\n", - "\n", - "We can incorporate this into [LangGraph](https://langchain-ai.github.io/langgraph/).\n", - "\n", - "![agentic_rag.png]()" - ] - }, - { - "cell_type": "markdown", - "id": "425fb020-e864-40ce-a31f-8da40c73d14b", - "metadata": { - "id": "425fb020-e864-40ce-a31f-8da40c73d14b" - }, - "source": [ - "## Setup\n", - "\n", - "First, let's download the required packages and set our API keys:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "969fb438", - "metadata": { - "id": "969fb438" - }, - "outputs": [], - "source": [ - "%pip install -q langchain-community tiktoken langchain-openai langchainhub langchain-redis langchain langgraph langchain-text-splitters bs4" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "e4958a8c", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cells": [ + { + "cell_type": "markdown", + "id": "1VK8QKOVG2Ek", + "metadata": { + "id": "1VK8QKOVG2Ek" + }, + "source": [ + "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n", + "\n", + "# Agentic RAG with LangGraph and Redis\n", + "\n", + "\"Open\n", + "\n", + "This notebook demonstrates the implementation of a Retrieval Agent using LangGraph and LangChain components. It showcases a flexible question-answering system that combines document retrieval with language model generation. The system uses an LLM with access to a retriever tool, making decisions about when to retrieve information from an index. Redis is utilized as a vector store for efficient document retrieval and embedding storage. Key features include adaptive query rewriting, document relevance assessment, and multi-step processing. The notebook illustrates how LangGraph can be used to create a sophisticated workflow for handling complex queries, integrating retrieval, reasoning, and generation capabilities in a single system.\n", + "\n", + "[Retrieval Agents](https://python.langchain.com/docs/tutorials/qa_chat_history/#agents) are useful when we want to make decisions about whether to retrieve from an index.\n", + "\n", + "To implement a retrieval agent, we simply need to give an LLM access to a retriever tool.\n", + "\n", + "We can incorporate this into [LangGraph](https://langchain-ai.github.io/langgraph/).\n", + "\n", + "![agentic_rag.png]()" + ] + }, + { + "cell_type": "markdown", + "id": "425fb020-e864-40ce-a31f-8da40c73d14b", + "metadata": { + "id": "425fb020-e864-40ce-a31f-8da40c73d14b" + }, + "source": [ + "## Setup\n", + "\n", + "First, let's download the required packages and set our API keys:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "969fb438", + "metadata": { + "id": "969fb438" + }, + "outputs": [], + "source": [ + "%pip install -q langchain-community tiktoken langchain-openai langchainhub \"langchain-redis>=0.2.0\" langchain langgraph langchain-text-splitters bs4" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e4958a8c", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e4958a8c", + "outputId": "276c5d89-a4d7-4c79-d307-b619a5489830" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OPENAI_API_KEY:··········\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "\n", + "def _set_env(key: str):\n", + " if key not in os.environ:\n", + " os.environ[key] = getpass.getpass(f\"{key}:\")\n", + "\n", + "\n", + "_set_env(\"OPENAI_API_KEY\")" + ] + }, + { + "cell_type": "markdown", + "id": "Po4K08Uoa5HJ", + "metadata": { + "id": "Po4K08Uoa5HJ" + }, + "source": [ + "### Setup Redis" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "VLy0onoAa7KI", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VLy0onoAa7KI", + "outputId": "b346e76e-e87d-437f-c9fa-78647db77f4e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb jammy main\n", + "Starting redis-stack-server, database path /var/lib/redis-stack\n" + ] + } + ], + "source": [ + "# NBVAL_SKIP\n", + "%%sh\n", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", + "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", + "sudo apt-get update > /dev/null 2>&1\n", + "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", + "redis-stack-server --daemonize yes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7c2KKPhOh4zM", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7c2KKPhOh4zM", + "outputId": "0e314576-b34e-4881-ddf0-80d686810091" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connecting to Redis at: redis://localhost:6379\n" + ] + } + ], + "source": [ + "# Use the environment variable if set, otherwise default to localhost\n", + "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", + "print(f\"Connecting to Redis at: {REDIS_URL}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c74e4532", + "metadata": { + "id": "c74e4532" + }, + "source": [ + "## Retriever\n", + "\n", + "First, we index 3 blog posts. For this we setup a retriever using Redis as a vector store." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e50c9efe-4abe-42fa-b35a-05eeeede9ec6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e50c9efe-4abe-42fa-b35a-05eeeede9ec6", + "outputId": "f3ab6120-eb1e-4de8-dcc6-0abb7fe9201b" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:langchain_community.utils.user_agent:USER_AGENT environment variable not set, consider setting it to identify your requests.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18:31:28 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "18:31:28 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "18:31:30 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import WebBaseLoader\n", + "\n", + "from langchain_redis import RedisVectorStore\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "urls = [\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", + " \"https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/\",\n", + " \"https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/\",\n", + "]\n", + "\n", + "docs = [WebBaseLoader(url).load() for url in urls]\n", + "docs_list = [item for sublist in docs for item in sublist]\n", + "\n", + "text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n", + " chunk_size=100, chunk_overlap=50\n", + ")\n", + "doc_splits = text_splitter.split_documents(docs_list)\n", + "\n", + "# Add to document chunks to Redis\n", + "vectorstore = RedisVectorStore.from_documents(\n", + " doc_splits,\n", + " OpenAIEmbeddings(),\n", + " redis_url=REDIS_URL,\n", + " index_name=\"rag-redis\"\n", + ")\n", + "# get RedisVectorStore as a retriever\n", + "retriever = vectorstore.as_retriever()" + ] + }, + { + "cell_type": "markdown", + "id": "225d2277-45b2-4ae8-a7d6-62b07fb4a002", + "metadata": { + "id": "225d2277-45b2-4ae8-a7d6-62b07fb4a002" + }, + "source": [ + "Then we create a retriever tool." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0b97bdd8-d7e3-444d-ac96-5ef4725f9048", + "metadata": { + "id": "0b97bdd8-d7e3-444d-ac96-5ef4725f9048" + }, + "outputs": [], + "source": [ + "from langchain.tools.retriever import create_retriever_tool\n", + "\n", + "retriever_tool = create_retriever_tool(\n", + " retriever,\n", + " \"retrieve_blog_posts\",\n", + " \"Search and return information about Lilian Weng blog posts on LLM agents, prompt engineering, and adversarial attacks on LLMs.\",\n", + ")\n", + "\n", + "tools = [retriever_tool]" + ] + }, + { + "cell_type": "markdown", + "id": "fe6e8f78-1ef7-42ad-b2bf-835ed5850553", + "metadata": { + "id": "fe6e8f78-1ef7-42ad-b2bf-835ed5850553" + }, + "source": [ + "## Agent State\n", + "\n", + "We will define a graph.\n", + "\n", + "A `state` object that it passes around to each node.\n", + "\n", + "Our state will be a list of `messages`.\n", + "\n", + "Each node in our graph will append to it." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0e378706-47d5-425a-8ba0-57b9acffbd0c", + "metadata": { + "id": "0e378706-47d5-425a-8ba0-57b9acffbd0c" + }, + "outputs": [], + "source": [ + "from typing import Annotated, Sequence, TypedDict\n", + "\n", + "from langchain_core.messages import BaseMessage\n", + "\n", + "from langgraph.graph.message import add_messages\n", + "\n", + "\n", + "class AgentState(TypedDict):\n", + " # The add_messages function defines how an update should be processed\n", + " # Default is to replace. add_messages says \"append\"\n", + " messages: Annotated[Sequence[BaseMessage], add_messages]" + ] + }, + { + "cell_type": "markdown", + "id": "dc949d42-8a34-4231-bff0-b8198975e2ce", + "metadata": { + "id": "dc949d42-8a34-4231-bff0-b8198975e2ce" + }, + "source": [ + "## Nodes and Edges\n", + "\n", + "We can lay out an agentic RAG graph like this:\n", + "\n", + "* The state is a set of messages\n", + "* Each node will update (append to) state\n", + "* Conditional edges decide which node to visit next\n", + "\n", + "![langgraph.png]()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "278d1d83-dda6-4de4-bf8b-be9965c227fa", + "metadata": { + "id": "278d1d83-dda6-4de4-bf8b-be9965c227fa" + }, + "outputs": [], + "source": [ + "from typing import Annotated, Literal, Sequence, TypedDict\n", + "\n", + "from langchain_core.messages import BaseMessage, HumanMessage\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import PromptTemplate, ChatPromptTemplate\n", + "from langchain_openai import ChatOpenAI\n", + "# NOTE: you must use langchain-core >= 0.3 with Pydantic v2\n", + "from pydantic import BaseModel, Field\n", + "\n", + "\n", + "from langgraph.prebuilt import tools_condition\n", + "\n", + "### Edges\n", + "\n", + "\n", + "def grade_documents(state) -> Literal[\"generate\", \"rewrite\"]:\n", + " \"\"\"\n", + " Determines whether the retrieved documents are relevant to the question.\n", + "\n", + " Args:\n", + " state (messages): The current state\n", + "\n", + " Returns:\n", + " str: A decision for whether the documents are relevant or not\n", + " \"\"\"\n", + "\n", + " print(\"---CHECK RELEVANCE---\")\n", + "\n", + " # Data model\n", + " class grade(BaseModel):\n", + " \"\"\"Binary score for relevance check.\"\"\"\n", + "\n", + " binary_score: str = Field(description=\"Relevance score 'yes' or 'no'\")\n", + "\n", + " # LLM\n", + " model = ChatOpenAI(temperature=0, model=\"gpt-4-0125-preview\", streaming=True)\n", + "\n", + " # LLM with tool and validation\n", + " llm_with_tool = model.with_structured_output(grade)\n", + "\n", + " # Prompt\n", + " prompt = PromptTemplate(\n", + " template=\"\"\"You are a grader assessing relevance of a retrieved document to a user question. \\n\n", + " Here is the retrieved document: \\n\\n {context} \\n\\n\n", + " Here is the user question: {question} \\n\n", + " If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \\n\n", + " Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.\"\"\",\n", + " input_variables=[\"context\", \"question\"],\n", + " )\n", + "\n", + " # Chain\n", + " chain = prompt | llm_with_tool\n", + "\n", + " messages = state[\"messages\"]\n", + " last_message = messages[-1]\n", + "\n", + " question = messages[0].content\n", + " docs = last_message.content\n", + "\n", + " scored_result = chain.invoke({\"question\": question, \"context\": docs})\n", + "\n", + " score = scored_result.binary_score\n", + "\n", + " if score == \"yes\":\n", + " print(\"---DECISION: DOCS RELEVANT---\")\n", + " return \"generate\"\n", + "\n", + " else:\n", + " print(\"---DECISION: DOCS NOT RELEVANT---\")\n", + " print(score)\n", + " return \"rewrite\"\n", + "\n", + "\n", + "### Nodes\n", + "\n", + "\n", + "def agent(state):\n", + " \"\"\"\n", + " Invokes the agent model to generate a response based on the current state. Given\n", + " the question, it will decide to retrieve using the retriever tool, or simply end.\n", + "\n", + " Args:\n", + " state (messages): The current state\n", + "\n", + " Returns:\n", + " dict: The updated state with the agent response appended to messages\n", + " \"\"\"\n", + " print(\"---CALL AGENT---\")\n", + " messages = state[\"messages\"]\n", + " model = ChatOpenAI(temperature=0, streaming=True, model=\"gpt-4-turbo\")\n", + " model = model.bind_tools(tools)\n", + " response = model.invoke(messages)\n", + " # We return a list, because this will get added to the existing list\n", + " return {\"messages\": [response]}\n", + "\n", + "\n", + "def rewrite(state):\n", + " \"\"\"\n", + " Transform the query to produce a better question.\n", + "\n", + " Args:\n", + " state (messages): The current state\n", + "\n", + " Returns:\n", + " dict: The updated state with re-phrased question\n", + " \"\"\"\n", + "\n", + " print(\"---TRANSFORM QUERY---\")\n", + " messages = state[\"messages\"]\n", + " question = messages[0].content\n", + "\n", + " msg = [\n", + " HumanMessage(\n", + " content=f\"\"\" \\n\n", + " Look at the input and try to reason about the underlying semantic intent / meaning. \\n\n", + " Here is the initial question:\n", + " \\n ------- \\n\n", + " {question}\n", + " \\n ------- \\n\n", + " Formulate an improved question: \"\"\",\n", + " )\n", + " ]\n", + "\n", + " # Grader\n", + " model = ChatOpenAI(temperature=0, model=\"gpt-4-0125-preview\", streaming=True)\n", + " response = model.invoke(msg)\n", + " return {\"messages\": [response]}\n", + "\n", + "\n", + "def generate(state):\n", + " \"\"\"\n", + " Generate answer\n", + "\n", + " Args:\n", + " state (messages): The current state\n", + "\n", + " Returns:\n", + " dict: The updated state with re-phrased question\n", + " \"\"\"\n", + " print(\"---GENERATE---\")\n", + " messages = state[\"messages\"]\n", + " question = messages[0].content\n", + " last_message = messages[-1]\n", + "\n", + " docs = last_message.content\n", + "\n", + " # Prompt\n", + " prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\",\n", + " ),\n", + " (\"system\", \"Context: {context}\"),\n", + " (\"human\", \"Question: {question} \"),\n", + " ]\n", + " )\n", + "\n", + " # LLM\n", + " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0, streaming=True)\n", + "\n", + " # Chain\n", + " rag_chain = prompt | llm | StrOutputParser()\n", + "\n", + " # Run\n", + " response = rag_chain.invoke({\"context\": docs, \"question\": question})\n", + " return {\"messages\": [response]}" + ] + }, + { + "cell_type": "markdown", + "id": "955882ef-7467-48db-ae51-de441f2fc3a7", + "metadata": { + "id": "955882ef-7467-48db-ae51-de441f2fc3a7" + }, + "source": [ + "## Graph\n", + "\n", + "* Start with an agent, `call_model`\n", + "* Agent make a decision to call a function\n", + "* If so, then `action` to call tool (retriever)\n", + "* Then call agent with the tool output added to messages (`state`)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8718a37f-83c2-4f16-9850-e61e0f49c3d4", + "metadata": { + "id": "8718a37f-83c2-4f16-9850-e61e0f49c3d4" + }, + "outputs": [], + "source": [ + "from langgraph.graph import END, StateGraph, START\n", + "from langgraph.prebuilt import ToolNode\n", + "\n", + "# Define a new graph\n", + "workflow = StateGraph(AgentState)\n", + "\n", + "# Define the nodes we will cycle between\n", + "workflow.add_node(\"agent\", agent) # agent\n", + "retrieve = ToolNode([retriever_tool])\n", + "workflow.add_node(\"retrieve\", retrieve) # retrieval\n", + "workflow.add_node(\"rewrite\", rewrite) # Re-writing the question\n", + "workflow.add_node(\n", + " \"generate\", generate\n", + ") # Generating a response after we know the documents are relevant\n", + "# Call agent node to decide to retrieve or not\n", + "workflow.add_edge(START, \"agent\")\n", + "\n", + "# Decide whether to retrieve\n", + "workflow.add_conditional_edges(\n", + " \"agent\",\n", + " # Assess agent decision\n", + " tools_condition,\n", + " {\n", + " # Translate the condition outputs to nodes in our graph\n", + " \"tools\": \"retrieve\",\n", + " END: END,\n", + " },\n", + ")\n", + "\n", + "# Edges taken after the `action` node is called.\n", + "workflow.add_conditional_edges(\n", + " \"retrieve\",\n", + " # Assess agent decision\n", + " grade_documents,\n", + ")\n", + "workflow.add_edge(\"generate\", END)\n", + "workflow.add_edge(\"rewrite\", \"agent\")\n", + "\n", + "# Compile\n", + "graph = workflow.compile()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7b5a1d35", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 473 + }, + "id": "7b5a1d35", + "outputId": "7b95dcbe-5a26-42b5-9708-8a1020564622" + }, + "outputs": [ + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import Image, display\n", + "\n", + "try:\n", + " display(Image(graph.get_graph(xray=True).draw_mermaid_png()))\n", + "except Exception:\n", + " # This requires some extra dependencies and is optional\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7649f05a-cb67-490d-b24a-74d41895139a", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7649f05a-cb67-490d-b24a-74d41895139a", + "outputId": "5ab8e289-5dc3-4285-ec5a-574c7ccec01e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---CALL AGENT---\n", + "18:32:46 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\"Output from node 'agent':\"\n", + "'---'\n", + "{ 'messages': [ AIMessage(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_sDky13ZhyfzMmoNr0vO79i9n', 'function': {'arguments': '{\"query\":\"types of agent memory\"}', 'name': 'retrieve_blog_posts'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5db30363ff'}, id='run-bda3e47f-d5a6-44a8-9dd2-f4f51b0f6627-0', tool_calls=[{'name': 'retrieve_blog_posts', 'args': {'query': 'types of agent memory'}, 'id': 'call_sDky13ZhyfzMmoNr0vO79i9n', 'type': 'tool_call'}])]}\n", + "'\\n---\\n'\n", + "18:32:47 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "---CHECK RELEVANCE---\n", + "18:32:49 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "---DECISION: DOCS RELEVANT---\n", + "\"Output from node 'retrieve':\"\n", + "'---'\n", + "{ 'messages': [ ToolMessage(content='Table of Contents\\n\\n\\n\\nAgent System Overview\\n\\nComponent One: Planning\\n\\nTask Decomposition\\n\\nSelf-Reflection\\n\\n\\nComponent Two: Memory\\n\\nTypes of Memory\\n\\nMaximum Inner Product Search (MIPS)\\n\\n\\nComponent Three: Tool Use\\n\\nCase Studies\\n\\nScientific Discovery Agent\\n\\nGenerative Agents Simulation\\n\\nProof-of-Concept Examples\\n\\n\\nChallenges\\n\\nCitation\\n\\nReferences\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.', name='retrieve_blog_posts', id='c7b3f250-b7c2-43a3-a852-8c2603f10fc0', tool_call_id='call_sDky13ZhyfzMmoNr0vO79i9n')]}\n", + "'\\n---\\n'\n", + "---GENERATE---\n", + "18:32:50 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "\"Output from node 'generate':\"\n", + "'---'\n", + "{ 'messages': [ 'Lilian Weng discusses short-term memory as utilizing '\n", + " 'in-context learning for the model to learn and long-term '\n", + " 'memory as enabling the agent to retain and recall information '\n", + " 'over extended periods by leveraging an external vector store '\n", + " 'for fast retrieval.']}\n", + "'\\n---\\n'\n" + ] + } + ], + "source": [ + "import pprint\n", + "\n", + "inputs = {\n", + " \"messages\": [\n", + " (\"user\", \"What does Lilian Weng say about the types of agent memory?\"),\n", + " ]\n", + "}\n", + "for output in graph.stream(inputs):\n", + " for key, value in output.items():\n", + " pprint.pprint(f\"Output from node '{key}':\")\n", + " pprint.pprint(\"---\")\n", + " pprint.pprint(value, indent=2, width=80, depth=None)\n", + " pprint.pprint(\"\\n---\\n\")" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } }, - "id": "e4958a8c", - "outputId": "276c5d89-a4d7-4c79-d307-b619a5489830" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OPENAI_API_KEY:··········\n" - ] - } - ], - "source": [ - "import getpass\n", - "import os\n", - "\n", - "\n", - "def _set_env(key: str):\n", - " if key not in os.environ:\n", - " os.environ[key] = getpass.getpass(f\"{key}:\")\n", - "\n", - "\n", - "_set_env(\"OPENAI_API_KEY\")" - ] - }, - { - "cell_type": "markdown", - "id": "Po4K08Uoa5HJ", - "metadata": { - "id": "Po4K08Uoa5HJ" - }, - "source": [ - "### Setup Redis" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "VLy0onoAa7KI", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VLy0onoAa7KI", - "outputId": "b346e76e-e87d-437f-c9fa-78647db77f4e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb jammy main\n", - "Starting redis-stack-server, database path /var/lib/redis-stack\n" - ] - } - ], - "source": [ - "# NBVAL_SKIP\n", - "%%sh\n", - "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n", - "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n", - "sudo apt-get update > /dev/null 2>&1\n", - "sudo apt-get install redis-stack-server > /dev/null 2>&1\n", - "redis-stack-server --daemonize yes" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "7c2KKPhOh4zM", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7c2KKPhOh4zM", - "outputId": "0e314576-b34e-4881-ddf0-80d686810091" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connecting to Redis at: redis://localhost:6379\n" - ] - } - ], - "source": [ - "# Use the environment variable if set, otherwise default to localhost\n", - "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n", - "print(f\"Connecting to Redis at: {REDIS_URL}\")" - ] - }, - { - "cell_type": "markdown", - "id": "c74e4532", - "metadata": { - "id": "c74e4532" - }, - "source": [ - "## Retriever\n", - "\n", - "First, we index 3 blog posts. For this we setup a retriever using Redis as a vector store." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "e50c9efe-4abe-42fa-b35a-05eeeede9ec6", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "e50c9efe-4abe-42fa-b35a-05eeeede9ec6", - "outputId": "f3ab6120-eb1e-4de8-dcc6-0abb7fe9201b" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:langchain_community.utils.user_agent:USER_AGENT environment variable not set, consider setting it to identify your requests.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18:31:28 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", - "18:31:28 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", - "18:31:30 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" - ] - } - ], - "source": [ - "from langchain_community.document_loaders import WebBaseLoader\n", - "\n", - "from langchain_redis import RedisVectorStore\n", - "from langchain_openai import OpenAIEmbeddings\n", - "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", - "\n", - "urls = [\n", - " \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", - " \"https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/\",\n", - " \"https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/\",\n", - "]\n", - "\n", - "docs = [WebBaseLoader(url).load() for url in urls]\n", - "docs_list = [item for sublist in docs for item in sublist]\n", - "\n", - "text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n", - " chunk_size=100, chunk_overlap=50\n", - ")\n", - "doc_splits = text_splitter.split_documents(docs_list)\n", - "\n", - "# Add to document chunks to Redis\n", - "vectorstore = RedisVectorStore.from_documents(\n", - " doc_splits,\n", - " OpenAIEmbeddings(),\n", - " redis_url=REDIS_URL,\n", - " index_name=\"rag-redis\"\n", - ")\n", - "# get RedisVectorStore as a retriever\n", - "retriever = vectorstore.as_retriever()" - ] - }, - { - "cell_type": "markdown", - "id": "225d2277-45b2-4ae8-a7d6-62b07fb4a002", - "metadata": { - "id": "225d2277-45b2-4ae8-a7d6-62b07fb4a002" - }, - "source": [ - "Then we create a retriever tool." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0b97bdd8-d7e3-444d-ac96-5ef4725f9048", - "metadata": { - "id": "0b97bdd8-d7e3-444d-ac96-5ef4725f9048" - }, - "outputs": [], - "source": [ - "from langchain.tools.retriever import create_retriever_tool\n", - "\n", - "retriever_tool = create_retriever_tool(\n", - " retriever,\n", - " \"retrieve_blog_posts\",\n", - " \"Search and return information about Lilian Weng blog posts on LLM agents, prompt engineering, and adversarial attacks on LLMs.\",\n", - ")\n", - "\n", - "tools = [retriever_tool]" - ] - }, - { - "cell_type": "markdown", - "id": "fe6e8f78-1ef7-42ad-b2bf-835ed5850553", - "metadata": { - "id": "fe6e8f78-1ef7-42ad-b2bf-835ed5850553" - }, - "source": [ - "## Agent State\n", - "\n", - "We will define a graph.\n", - "\n", - "A `state` object that it passes around to each node.\n", - "\n", - "Our state will be a list of `messages`.\n", - "\n", - "Each node in our graph will append to it." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0e378706-47d5-425a-8ba0-57b9acffbd0c", - "metadata": { - "id": "0e378706-47d5-425a-8ba0-57b9acffbd0c" - }, - "outputs": [], - "source": [ - "from typing import Annotated, Sequence, TypedDict\n", - "\n", - "from langchain_core.messages import BaseMessage\n", - "\n", - "from langgraph.graph.message import add_messages\n", - "\n", - "\n", - "class AgentState(TypedDict):\n", - " # The add_messages function defines how an update should be processed\n", - " # Default is to replace. add_messages says \"append\"\n", - " messages: Annotated[Sequence[BaseMessage], add_messages]" - ] - }, - { - "cell_type": "markdown", - "id": "dc949d42-8a34-4231-bff0-b8198975e2ce", - "metadata": { - "id": "dc949d42-8a34-4231-bff0-b8198975e2ce" - }, - "source": [ - "## Nodes and Edges\n", - "\n", - "We can lay out an agentic RAG graph like this:\n", - "\n", - "* The state is a set of messages\n", - "* Each node will update (append to) state\n", - "* Conditional edges decide which node to visit next\n", - "\n", - "![langgraph.png]()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "278d1d83-dda6-4de4-bf8b-be9965c227fa", - "metadata": { - "id": "278d1d83-dda6-4de4-bf8b-be9965c227fa" - }, - "outputs": [], - "source": [ - "from typing import Annotated, Literal, Sequence, TypedDict\n", - "\n", - "from langchain_core.messages import BaseMessage, HumanMessage\n", - "from langchain_core.output_parsers import StrOutputParser\n", - "from langchain_core.prompts import PromptTemplate, ChatPromptTemplate\n", - "from langchain_openai import ChatOpenAI\n", - "# NOTE: you must use langchain-core >= 0.3 with Pydantic v2\n", - "from pydantic import BaseModel, Field\n", - "\n", - "\n", - "from langgraph.prebuilt import tools_condition\n", - "\n", - "### Edges\n", - "\n", - "\n", - "def grade_documents(state) -> Literal[\"generate\", \"rewrite\"]:\n", - " \"\"\"\n", - " Determines whether the retrieved documents are relevant to the question.\n", - "\n", - " Args:\n", - " state (messages): The current state\n", - "\n", - " Returns:\n", - " str: A decision for whether the documents are relevant or not\n", - " \"\"\"\n", - "\n", - " print(\"---CHECK RELEVANCE---\")\n", - "\n", - " # Data model\n", - " class grade(BaseModel):\n", - " \"\"\"Binary score for relevance check.\"\"\"\n", - "\n", - " binary_score: str = Field(description=\"Relevance score 'yes' or 'no'\")\n", - "\n", - " # LLM\n", - " model = ChatOpenAI(temperature=0, model=\"gpt-4-0125-preview\", streaming=True)\n", - "\n", - " # LLM with tool and validation\n", - " llm_with_tool = model.with_structured_output(grade)\n", - "\n", - " # Prompt\n", - " prompt = PromptTemplate(\n", - " template=\"\"\"You are a grader assessing relevance of a retrieved document to a user question. \\n\n", - " Here is the retrieved document: \\n\\n {context} \\n\\n\n", - " Here is the user question: {question} \\n\n", - " If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \\n\n", - " Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.\"\"\",\n", - " input_variables=[\"context\", \"question\"],\n", - " )\n", - "\n", - " # Chain\n", - " chain = prompt | llm_with_tool\n", - "\n", - " messages = state[\"messages\"]\n", - " last_message = messages[-1]\n", - "\n", - " question = messages[0].content\n", - " docs = last_message.content\n", - "\n", - " scored_result = chain.invoke({\"question\": question, \"context\": docs})\n", - "\n", - " score = scored_result.binary_score\n", - "\n", - " if score == \"yes\":\n", - " print(\"---DECISION: DOCS RELEVANT---\")\n", - " return \"generate\"\n", - "\n", - " else:\n", - " print(\"---DECISION: DOCS NOT RELEVANT---\")\n", - " print(score)\n", - " return \"rewrite\"\n", - "\n", - "\n", - "### Nodes\n", - "\n", - "\n", - "def agent(state):\n", - " \"\"\"\n", - " Invokes the agent model to generate a response based on the current state. Given\n", - " the question, it will decide to retrieve using the retriever tool, or simply end.\n", - "\n", - " Args:\n", - " state (messages): The current state\n", - "\n", - " Returns:\n", - " dict: The updated state with the agent response appended to messages\n", - " \"\"\"\n", - " print(\"---CALL AGENT---\")\n", - " messages = state[\"messages\"]\n", - " model = ChatOpenAI(temperature=0, streaming=True, model=\"gpt-4-turbo\")\n", - " model = model.bind_tools(tools)\n", - " response = model.invoke(messages)\n", - " # We return a list, because this will get added to the existing list\n", - " return {\"messages\": [response]}\n", - "\n", - "\n", - "def rewrite(state):\n", - " \"\"\"\n", - " Transform the query to produce a better question.\n", - "\n", - " Args:\n", - " state (messages): The current state\n", - "\n", - " Returns:\n", - " dict: The updated state with re-phrased question\n", - " \"\"\"\n", - "\n", - " print(\"---TRANSFORM QUERY---\")\n", - " messages = state[\"messages\"]\n", - " question = messages[0].content\n", - "\n", - " msg = [\n", - " HumanMessage(\n", - " content=f\"\"\" \\n\n", - " Look at the input and try to reason about the underlying semantic intent / meaning. \\n\n", - " Here is the initial question:\n", - " \\n ------- \\n\n", - " {question}\n", - " \\n ------- \\n\n", - " Formulate an improved question: \"\"\",\n", - " )\n", - " ]\n", - "\n", - " # Grader\n", - " model = ChatOpenAI(temperature=0, model=\"gpt-4-0125-preview\", streaming=True)\n", - " response = model.invoke(msg)\n", - " return {\"messages\": [response]}\n", - "\n", - "\n", - "def generate(state):\n", - " \"\"\"\n", - " Generate answer\n", - "\n", - " Args:\n", - " state (messages): The current state\n", - "\n", - " Returns:\n", - " dict: The updated state with re-phrased question\n", - " \"\"\"\n", - " print(\"---GENERATE---\")\n", - " messages = state[\"messages\"]\n", - " question = messages[0].content\n", - " last_message = messages[-1]\n", - "\n", - " docs = last_message.content\n", - "\n", - " # Prompt\n", - " prompt = ChatPromptTemplate.from_messages(\n", - " [\n", - " (\n", - " \"system\",\n", - " \"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\",\n", - " ),\n", - " (\"system\", \"Context: {context}\"),\n", - " (\"human\", \"Question: {question} \"),\n", - " ]\n", - " )\n", - "\n", - " # LLM\n", - " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0, streaming=True)\n", - "\n", - " # Chain\n", - " rag_chain = prompt | llm | StrOutputParser()\n", - "\n", - " # Run\n", - " response = rag_chain.invoke({\"context\": docs, \"question\": question})\n", - " return {\"messages\": [response]}" - ] - }, - { - "cell_type": "markdown", - "id": "955882ef-7467-48db-ae51-de441f2fc3a7", - "metadata": { - "id": "955882ef-7467-48db-ae51-de441f2fc3a7" - }, - "source": [ - "## Graph\n", - "\n", - "* Start with an agent, `call_model`\n", - "* Agent make a decision to call a function\n", - "* If so, then `action` to call tool (retriever)\n", - "* Then call agent with the tool output added to messages (`state`)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8718a37f-83c2-4f16-9850-e61e0f49c3d4", - "metadata": { - "id": "8718a37f-83c2-4f16-9850-e61e0f49c3d4" - }, - "outputs": [], - "source": [ - "from langgraph.graph import END, StateGraph, START\n", - "from langgraph.prebuilt import ToolNode\n", - "\n", - "# Define a new graph\n", - "workflow = StateGraph(AgentState)\n", - "\n", - "# Define the nodes we will cycle between\n", - "workflow.add_node(\"agent\", agent) # agent\n", - "retrieve = ToolNode([retriever_tool])\n", - "workflow.add_node(\"retrieve\", retrieve) # retrieval\n", - "workflow.add_node(\"rewrite\", rewrite) # Re-writing the question\n", - "workflow.add_node(\n", - " \"generate\", generate\n", - ") # Generating a response after we know the documents are relevant\n", - "# Call agent node to decide to retrieve or not\n", - "workflow.add_edge(START, \"agent\")\n", - "\n", - "# Decide whether to retrieve\n", - "workflow.add_conditional_edges(\n", - " \"agent\",\n", - " # Assess agent decision\n", - " tools_condition,\n", - " {\n", - " # Translate the condition outputs to nodes in our graph\n", - " \"tools\": \"retrieve\",\n", - " END: END,\n", - " },\n", - ")\n", - "\n", - "# Edges taken after the `action` node is called.\n", - "workflow.add_conditional_edges(\n", - " \"retrieve\",\n", - " # Assess agent decision\n", - " grade_documents,\n", - ")\n", - "workflow.add_edge(\"generate\", END)\n", - "workflow.add_edge(\"rewrite\", \"agent\")\n", - "\n", - "# Compile\n", - "graph = workflow.compile()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "7b5a1d35", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 473 - }, - "id": "7b5a1d35", - "outputId": "7b95dcbe-5a26-42b5-9708-8a1020564622" - }, - "outputs": [ - { - "data": { - "image/jpeg": "", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from IPython.display import Image, display\n", - "\n", - "try:\n", - " display(Image(graph.get_graph(xray=True).draw_mermaid_png()))\n", - "except Exception:\n", - " # This requires some extra dependencies and is optional\n", - " pass" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "7649f05a-cb67-490d-b24a-74d41895139a", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7649f05a-cb67-490d-b24a-74d41895139a", - "outputId": "5ab8e289-5dc3-4285-ec5a-574c7ccec01e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "---CALL AGENT---\n", - "18:32:46 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "\"Output from node 'agent':\"\n", - "'---'\n", - "{ 'messages': [ AIMessage(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_sDky13ZhyfzMmoNr0vO79i9n', 'function': {'arguments': '{\"query\":\"types of agent memory\"}', 'name': 'retrieve_blog_posts'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4-turbo-2024-04-09', 'system_fingerprint': 'fp_5db30363ff'}, id='run-bda3e47f-d5a6-44a8-9dd2-f4f51b0f6627-0', tool_calls=[{'name': 'retrieve_blog_posts', 'args': {'query': 'types of agent memory'}, 'id': 'call_sDky13ZhyfzMmoNr0vO79i9n', 'type': 'tool_call'}])]}\n", - "'\\n---\\n'\n", - "18:32:47 httpx INFO HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", - "---CHECK RELEVANCE---\n", - "18:32:49 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "---DECISION: DOCS RELEVANT---\n", - "\"Output from node 'retrieve':\"\n", - "'---'\n", - "{ 'messages': [ ToolMessage(content='Table of Contents\\n\\n\\n\\nAgent System Overview\\n\\nComponent One: Planning\\n\\nTask Decomposition\\n\\nSelf-Reflection\\n\\n\\nComponent Two: Memory\\n\\nTypes of Memory\\n\\nMaximum Inner Product Search (MIPS)\\n\\n\\nComponent Three: Tool Use\\n\\nCase Studies\\n\\nScientific Discovery Agent\\n\\nGenerative Agents Simulation\\n\\nProof-of-Concept Examples\\n\\n\\nChallenges\\n\\nCitation\\n\\nReferences\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.', name='retrieve_blog_posts', id='c7b3f250-b7c2-43a3-a852-8c2603f10fc0', tool_call_id='call_sDky13ZhyfzMmoNr0vO79i9n')]}\n", - "'\\n---\\n'\n", - "---GENERATE---\n", - "18:32:50 httpx INFO HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "\"Output from node 'generate':\"\n", - "'---'\n", - "{ 'messages': [ 'Lilian Weng discusses short-term memory as utilizing '\n", - " 'in-context learning for the model to learn and long-term '\n", - " 'memory as enabling the agent to retain and recall information '\n", - " 'over extended periods by leveraging an external vector store '\n", - " 'for fast retrieval.']}\n", - "'\\n---\\n'\n" - ] - } - ], - "source": [ - "import pprint\n", - "\n", - "inputs = {\n", - " \"messages\": [\n", - " (\"user\", \"What does Lilian Weng say about the types of agent memory?\"),\n", - " ]\n", - "}\n", - "for output in graph.stream(inputs):\n", - " for key, value in output.items():\n", - " pprint.pprint(f\"Output from node '{key}':\")\n", - " pprint.pprint(\"---\")\n", - " pprint.pprint(value, indent=2, width=80, depth=None)\n", - " pprint.pprint(\"\\n---\\n\")" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/python-recipes/agents/01_crewai_langgraph_redis.ipynb b/python-recipes/agents/01_crewai_langgraph_redis.ipynb index 2bd4ae1b..805b4d01 100644 --- a/python-recipes/agents/01_crewai_langgraph_redis.ipynb +++ b/python-recipes/agents/01_crewai_langgraph_redis.ipynb @@ -41,7 +41,7 @@ "outputs": [], "source": [ "%pip install -U --quiet crewai==0.76.2\n", - "%pip install -U --quiet langchain langchain-openai langchain-redis langgraph" + "%pip install -U --quiet langchain langchain-openai \"langchain-redis>=0.2.0\" langgraph" ] }, { @@ -1103,7 +1103,8 @@ "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.11.9" } }, "nbformat": 4, diff --git a/python-recipes/agents/02_full_featured_agent.ipynb b/python-recipes/agents/02_full_featured_agent.ipynb index dfd6bbd4..cb1ad606 100644 --- a/python-recipes/agents/02_full_featured_agent.ipynb +++ b/python-recipes/agents/02_full_featured_agent.ipynb @@ -36,7 +36,7 @@ }, "outputs": [], "source": [ - "%pip install -q langchain langchain-openai langchain-redis langgraph sentence-transformers" + "%pip install -q langchain langchain-openai \"langchain-redis>=0.2.0\" langgraph sentence-transformers" ] }, { diff --git a/python-recipes/computer-vision/00_facial_recognition_facenet.ipynb b/python-recipes/computer-vision/00_facial_recognition_facenet.ipynb index 3d1189bc..cc6592f7 100644 --- a/python-recipes/computer-vision/00_facial_recognition_facenet.ipynb +++ b/python-recipes/computer-vision/00_facial_recognition_facenet.ipynb @@ -350,8 +350,7 @@ " }\n", " ]\n", " }\n", - " index = SearchIndex.from_dict(schema)\n", - " index.set_client(client)\n", + " index = SearchIndex.from_dict(schema, redis_client=client)\n", " index.create(overwrite=True)\n", " return index\n", "\n", diff --git a/python-recipes/llm-session-manager/00_llm_session_manager.ipynb b/python-recipes/llm-session-manager/00_llm_session_manager.ipynb index 982e38dd..83b9f6d3 100644 --- a/python-recipes/llm-session-manager/00_llm_session_manager.ipynb +++ b/python-recipes/llm-session-manager/00_llm_session_manager.ipynb @@ -31,7 +31,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install cohere redisvl sentence-transformers" + "%pip install cohere \"redisvl>=0.4.1\" sentence-transformers" ] }, { @@ -668,7 +668,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/python-recipes/llm-session-manager/01_multiple_sessions.ipynb b/python-recipes/llm-session-manager/01_multiple_sessions.ipynb index 316decb7..f6e30546 100644 --- a/python-recipes/llm-session-manager/01_multiple_sessions.ipynb +++ b/python-recipes/llm-session-manager/01_multiple_sessions.ipynb @@ -28,7 +28,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install cohere redisvl sentence-transformers" + "%pip install cohere \"redisvl>=0.4.1\" sentence-transformers" ] }, { diff --git a/python-recipes/recommendation-systems/00_content_filtering.ipynb b/python-recipes/recommendation-systems/00_content_filtering.ipynb index 0cd4c093..56b7cff0 100644 --- a/python-recipes/recommendation-systems/00_content_filtering.ipynb +++ b/python-recipes/recommendation-systems/00_content_filtering.ipynb @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "%pip install -q redis redisvl sentence_transformers pandas requests" + "%pip install -q redis \"redisvl>=0.4.1\" sentence_transformers pandas requests" ] }, { diff --git a/python-recipes/recommendation-systems/01_collaborative_filtering.ipynb b/python-recipes/recommendation-systems/01_collaborative_filtering.ipynb index 77638988..382b98a0 100644 --- a/python-recipes/recommendation-systems/01_collaborative_filtering.ipynb +++ b/python-recipes/recommendation-systems/01_collaborative_filtering.ipynb @@ -65,7 +65,7 @@ } ], "source": [ - "%pip install redis redisvl pandas requests\n", + "%pip install redis \"redisvl>=0.4.1\" pandas requests\n", "%pip install numpy==1.25.0 scikit-surprise==1.1.3" ] }, diff --git a/python-recipes/recommendation-systems/02_two_towers.ipynb b/python-recipes/recommendation-systems/02_two_towers.ipynb index 2ec66d6d..ef034b10 100644 --- a/python-recipes/recommendation-systems/02_two_towers.ipynb +++ b/python-recipes/recommendation-systems/02_two_towers.ipynb @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install -q redis redisvl pandas torch requests scikit-learn" + "%pip install -q redis \"redisvl>=0.4.1\" pandas torch requests scikit-learn" ] }, { diff --git a/python-recipes/semantic-cache/doc2cache_llama3_1.ipynb b/python-recipes/semantic-cache/doc2cache_llama3_1.ipynb index 60de0ded..f87f354d 100644 --- a/python-recipes/semantic-cache/doc2cache_llama3_1.ipynb +++ b/python-recipes/semantic-cache/doc2cache_llama3_1.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "%pip install -q redisvl>=0.3.3 unstructured[pdf] sentence-transformers openai\n", + "%pip install -q \"redisvl>=0.4.1\" unstructured[pdf] sentence-transformers openai\n", "%pip install -q langchain-core langchain-community pypdf rapidocr-onnxruntime" ] }, diff --git a/python-recipes/semantic-cache/semantic_caching_gemini.ipynb b/python-recipes/semantic-cache/semantic_caching_gemini.ipynb index 7145a3bf..8ba17b8a 100644 --- a/python-recipes/semantic-cache/semantic_caching_gemini.ipynb +++ b/python-recipes/semantic-cache/semantic_caching_gemini.ipynb @@ -53,7 +53,7 @@ }, "outputs": [], "source": [ - "%pip install -q redisvl>=0.3.0 unstructured[pdf]\n", + "%pip install -q \"redisvl>=0.4.1\" unstructured[pdf]\n", "%pip install -q llama-parse llama-index-readers-file\n", "%pip install -q langchain langchain-google-vertexai" ] diff --git a/python-recipes/semantic-router/00_semantic_routing.ipynb b/python-recipes/semantic-router/00_semantic_routing.ipynb index 0b6f98e7..cdf57fbc 100644 --- a/python-recipes/semantic-router/00_semantic_routing.ipynb +++ b/python-recipes/semantic-router/00_semantic_routing.ipynb @@ -42,7 +42,7 @@ } ], "source": [ - "%pip install -q redisvl sentence-transformers" + "%pip install -q \"redisvl>=0.4.1\" sentence-transformers" ] }, { @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 17, "id": "aefda1d1", "metadata": {}, "outputs": [], @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 18, "id": "c52d454a", "metadata": {}, "outputs": [], @@ -170,17 +170,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 19, "id": "b986bf8d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RouteMatch(name='block_list', distance=0.375403106213)" + "RouteMatch(name='block_list', distance=0.375402927399)" ] }, - "execution_count": 5, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -219,12 +219,12 @@ "\n", "Each route has a set of references that cover the \"semantic surface area\" of the\n", "route. The incoming query from a user needs to be semantically similar to one or\n", - "more of the references in order to \"match\" on the route." + "more of the references in order to \"match\" on the route. Note that each route can have it's own distinct `distance_threshold` that defines what is considered a match for the particular query. " ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 20, "id": "60ad280c", "metadata": {}, "outputs": [], @@ -239,7 +239,8 @@ " \"tell me about the newest gadgets\",\n", " \"what's trending in tech?\"\n", " ],\n", - " metadata={\"category\": \"tech\", \"priority\": 1}\n", + " metadata={\"category\": \"tech\", \"priority\": 1},\n", + " distance_threshold=0.5\n", ")\n", "\n", "sports = Route(\n", @@ -251,7 +252,8 @@ " \"sports\",\n", " \"basketball and football\"\n", " ],\n", - " metadata={\"category\": \"sports\", \"priority\": 2}\n", + " metadata={\"category\": \"sports\", \"priority\": 2},\n", + " distance_threshold=0.7\n", ")\n", "\n", "entertainment = Route(\n", @@ -261,7 +263,8 @@ " \"who won the best actor award?\",\n", " \"what's new in the entertainment industry?\"\n", " ],\n", - " metadata={\"category\": \"entertainment\", \"priority\": 3}\n", + " metadata={\"category\": \"entertainment\", \"priority\": 3},\n", + " distance_threshold=0.6\n", ")" ] }, @@ -277,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 21, "id": "e80aaf84", "metadata": {}, "outputs": [], @@ -306,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 22, "id": "3caedb77", "metadata": {}, "outputs": [ @@ -348,17 +351,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 23, "id": "5b0e3208", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "RouteMatch(name='technology', distance=0.119614601135)" + "RouteMatch(name='technology', distance=0.419145862261)" ] }, - "execution_count": 9, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -371,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 24, "id": "ef90a1b8", "metadata": {}, "outputs": [ @@ -381,7 +384,7 @@ "RouteMatch(name=None, distance=None)" ] }, - "execution_count": 10, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -392,29 +395,6 @@ "route_match" ] }, - { - "cell_type": "code", - "execution_count": 11, - "id": "a937b471", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RouteMatch(name='sports', distance=0.554210424423)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Toggle the runtime distance threshold\n", - "route_match = multi_topic_router(\"Which basketball team will win the NBA finals?\", distance_threshold=0.7)\n", - "route_match" - ] - }, { "cell_type": "markdown", "id": "c3f8600a", @@ -425,44 +405,42 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 34, "id": "70335f93", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[RouteMatch(name='sports', distance=0.758580780029),\n", - " RouteMatch(name='entertainment', distance=0.812423845132),\n", - " RouteMatch(name='technology', distance=0.884235262871)]" + "[RouteMatch(name='sports', distance=0.274198234081),\n", + " RouteMatch(name='entertainment', distance=0.521298646927)]" ] }, - "execution_count": 12, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Perform multi-class classification with route_many() -- toggle the max_k and the distance_threshold\n", - "route_matches = multi_topic_router.route_many(\"Lebron James\", distance_threshold=1.0, max_k=3)\n", + "route_matches = multi_topic_router.route_many(\"entertainment and sports\", max_k=3)\n", "route_matches" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 35, "id": "874b80fc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[RouteMatch(name='sports', distance=0.663254141808),\n", - " RouteMatch(name='entertainment', distance=0.712985336781),\n", - " RouteMatch(name='technology', distance=0.832674443722)]" + "[RouteMatch(name='sports', distance=0.263298630714),\n", + " RouteMatch(name='entertainment', distance=0.512374281883)]" ] }, - "execution_count": 14, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -471,7 +449,7 @@ "# Toggle the aggregation method -- note the different distances in the result\n", "from redisvl.extensions.router.schema import DistanceAggregationMethod\n", "\n", - "route_matches = multi_topic_router.route_many(\"Lebron James\", aggregation_method=DistanceAggregationMethod.min, distance_threshold=1.0, max_k=3)\n", + "route_matches = multi_topic_router.route_many(\"sports and entertainment\", aggregation_method=DistanceAggregationMethod.min, max_k=3)\n", "route_matches" ] }, @@ -487,7 +465,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 36, "id": "86919de5", "metadata": {}, "outputs": [], @@ -495,25 +473,23 @@ "from redisvl.extensions.router import RoutingConfig\n", "\n", "multi_topic_router.update_routing_config(\n", - " RoutingConfig(distance_threshold=1.0, aggregation_method=DistanceAggregationMethod.min, max_k=3)\n", + " RoutingConfig(aggregation_method=DistanceAggregationMethod.min, max_k=3)\n", ")" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 37, "id": "cb883785", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[RouteMatch(name='sports', distance=0.663254141808),\n", - " RouteMatch(name='entertainment', distance=0.712985336781),\n", - " RouteMatch(name='technology', distance=0.832674443722)]" + "[RouteMatch(name='sports', distance=0.663253903389)]" ] }, - "execution_count": 16, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -533,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 38, "id": "f5ea2e61", "metadata": {}, "outputs": [ @@ -545,27 +521,28 @@ " 'references': ['what are the latest advancements in AI?',\n", " 'tell me about the newest gadgets',\n", " \"what's trending in tech?\"],\n", - " 'metadata': {'category': 'tech', 'priority': '1'}},\n", + " 'metadata': {'category': 'tech', 'priority': 1},\n", + " 'distance_threshold': 0.5},\n", " {'name': 'sports',\n", " 'references': ['who won the game last night?',\n", " 'tell me about the upcoming sports events',\n", " \"what's the latest in the world of sports?\",\n", " 'sports',\n", " 'basketball and football'],\n", - " 'metadata': {'category': 'sports', 'priority': '2'}},\n", + " 'metadata': {'category': 'sports', 'priority': 2},\n", + " 'distance_threshold': 0.7},\n", " {'name': 'entertainment',\n", " 'references': ['what are the top movies right now?',\n", " 'who won the best actor award?',\n", " \"what's new in the entertainment industry?\"],\n", - " 'metadata': {'category': 'entertainment', 'priority': '3'}}],\n", + " 'metadata': {'category': 'entertainment', 'priority': 3},\n", + " 'distance_threshold': 0.6}],\n", " 'vectorizer': {'type': 'hf',\n", " 'model': 'sentence-transformers/all-mpnet-base-v2'},\n", - " 'routing_config': {'distance_threshold': 1.0,\n", - " 'max_k': 3,\n", - " 'aggregation_method': 'min'}}" + " 'routing_config': {'max_k': 3, 'aggregation_method': 'min'}}" ] }, - "execution_count": 18, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -576,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 42, "id": "36ae6f50", "metadata": {}, "outputs": [ @@ -584,19 +561,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "10:50:18 redisvl.index.index INFO Index already exists, not overwriting.\n" + "11:18:33 redisvl.index.index INFO Index already exists, not overwriting.\n" ] } ], "source": [ - "router2 = SemanticRouter.from_dict(multi_topic_router.to_dict(), redis_url=\"redis://localhost:6379\")\n", - "\n", - "assert router2 == multi_topic_router" + "router2 = SemanticRouter.from_dict(multi_topic_router.to_dict(), redis_url=\"redis://localhost:6379\")" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 43, "id": "f601b065", "metadata": {}, "outputs": [], @@ -606,7 +581,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 44, "id": "63e4a847", "metadata": {}, "outputs": [ @@ -614,14 +589,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "10:50:43 redisvl.index.index INFO Index already exists, not overwriting.\n" + "11:18:53 redisvl.index.index INFO Index already exists, not overwriting.\n" ] } ], "source": [ - "router3 = SemanticRouter.from_yaml(\"router.yaml\", redis_url=\"redis://localhost:6379\")\n", - "\n", - "assert router3 == router2 == multi_topic_router" + "router3 = SemanticRouter.from_yaml(\"router.yaml\", redis_url=\"redis://localhost:6379\")" ] }, { diff --git a/python-recipes/vector-search/01_redisvl.ipynb b/python-recipes/vector-search/01_redisvl.ipynb index b4bc70d2..c31995de 100644 --- a/python-recipes/vector-search/01_redisvl.ipynb +++ b/python-recipes/vector-search/01_redisvl.ipynb @@ -97,7 +97,7 @@ } ], "source": [ - "%pip install -q redis redisvl numpy sentence-transformers pandas" + "%pip install -q redis \"redisvl>=0.4.1\" numpy sentence-transformers pandas" ] }, { diff --git a/python-recipes/vector-search/02_hybrid_search.ipynb b/python-recipes/vector-search/02_hybrid_search.ipynb index c4c701df..9a796955 100644 --- a/python-recipes/vector-search/02_hybrid_search.ipynb +++ b/python-recipes/vector-search/02_hybrid_search.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install -q \"redisvl>=0.3.5\" sentence-transformers pandas \"redis>=5.2.0\"" + "%pip install -q \"redisvl>=0.4.1\" sentence-transformers pandas \"redis>=5.2.0\"" ] }, { diff --git a/python-recipes/vector-search/03_float16_support.ipynb b/python-recipes/vector-search/03_float16_support.ipynb index 16445e5e..81743c99 100644 --- a/python-recipes/vector-search/03_float16_support.ipynb +++ b/python-recipes/vector-search/03_float16_support.ipynb @@ -39,7 +39,7 @@ } ], "source": [ - "%pip install -q 'redis>=5.0.8' 'redisvl>=0.3.4' numpy sentence-transformers" + "%pip install -q \"redis>=5.0.8\" \"redisvl>=0.4.1\" numpy sentence-transformers" ] }, {