diff --git a/.github/workflows/production_run_complete_llm.yml b/.github/workflows/production_run_complete_llm.yml index 354c9f9e..c57b84d5 100644 --- a/.github/workflows/production_run_complete_llm.yml +++ b/.github/workflows/production_run_complete_llm.yml @@ -14,7 +14,7 @@ jobs: run-staging-workflow: runs-on: ubuntu-latest env: - ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }} + ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }} ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }} ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }} @@ -46,7 +46,7 @@ jobs: working-directory: ./llm-complete-guide run: | zenml init - zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY + zenml connect --url $ZENML_STORE_URL --api-key $ZENML_API_KEY - name: Set stack (Production) working-directory: ./llm-complete-guide @@ -56,4 +56,4 @@ jobs: - name: Run pipeline, create pipeline, configure trigger (Production) working-directory: ./llm-complete-guide run: | - python gh_action_rag.py --no-cache --create-template ----event-source-id --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml \ No newline at end of file + python gh_action_rag.py --no-cache --create-template ----event-source-id --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id ${{ env.ZENML_ACTION_ID }} --config production/rag.yaml \ No newline at end of file diff --git a/.github/workflows/staging_run_complete_llm.yml b/.github/workflows/staging_run_complete_llm.yml index 57125f84..2c7ad35b 100644 --- a/.github/workflows/staging_run_complete_llm.yml +++ b/.github/workflows/staging_run_complete_llm.yml @@ -12,7 +12,7 @@ jobs: run-staging-workflow: runs-on: ubuntu-latest env: - ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }} + ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }} ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }} ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }} @@ -42,7 +42,7 @@ jobs: working-directory: ./llm-complete-guide run: | zenml init - zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY + zenml connect --url $ZENML_STORE_URL --api-key $ZENML_API_KEY - name: Set stack (Staging) working-directory: ./llm-complete-guide @@ -52,4 +52,4 @@ jobs: - name: Run pipeline (Staging) working-directory: ./llm-complete-guide run: | - python gh_action_rag.py --no-cache --config rag_local_dev.yaml \ No newline at end of file + python gh_action_rag.py --no-cache --config staging/rag.yaml \ No newline at end of file diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md index 5e5844c4..75f7586e 100644 --- a/llm-complete-guide/README.md +++ b/llm-complete-guide/README.md @@ -23,7 +23,7 @@ instructions are provided below for how to set that up. ## 📽️ Watch the webinars -We've recently been holding some webinars about this repository and project. Watche the videos below if you want an introduction and context around the code and ideas covered in this project. +We've recently been holding some webinars about this repository and project. Watch the videos below if you want an introduction and context around the code and ideas covered in this project. [](https://www.youtube.com/watch?v=PazRMY8bo3U) @@ -45,7 +45,7 @@ pip install -r requirements.txt Depending on your hardware you may run into some issues when running the `pip install` command with the `flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation` -could help you. +could help you. Possibly you might also need to install torch separately. In order to use the default LLM for this query, you'll need an account and an API key from OpenAI specified as a ZenML secret: @@ -85,7 +85,7 @@ to run the pipelines in the correct order. You can run the script with the following command: ```shell -python run.py --rag +python run.py rag ``` This will run the basic RAG pipeline, which scrapes the ZenML documentation and @@ -100,7 +100,7 @@ use for the LLM. When you're ready to make the query, run the following command: ```shell -python run.py --query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4 +python run.py query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4 ``` Alternative options for LLMs to use include: @@ -164,7 +164,7 @@ the RAG pipeline. To run the evaluation pipeline, you can use the following command: ```shell -python run.py --evaluation +python run.py evaluation ``` You'll need to have first run the RAG pipeline to have the necessary assets in @@ -182,7 +182,7 @@ To run the `distilabel` synthetic data generation pipeline, you can use the foll ```shell pip install -r requirements-argilla.txt # special requirements -python run.py --synthetic +python run.py synthetic ``` You will also need to have set up and connected to an Argilla instance for this @@ -221,7 +221,7 @@ commands: ```shell pip install -r requirements-argilla.txt # special requirements -python run.py --embeddings +python run.py embeddings ``` *Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka diff --git a/llm-complete-guide/configs/embeddings.yaml b/llm-complete-guide/configs/dev/embeddings.yaml similarity index 88% rename from llm-complete-guide/configs/embeddings.yaml rename to llm-complete-guide/configs/dev/embeddings.yaml index 6fb2cc20..4f00a98a 100644 --- a/llm-complete-guide/configs/embeddings.yaml +++ b/llm-complete-guide/configs/dev/embeddings.yaml @@ -3,7 +3,6 @@ # environment configuration settings: docker: - parent_image: "zenmldocker/prepare-release:base-0.68.0" requirements: - langchain-community - ratelimit @@ -27,13 +26,14 @@ settings: - datasets - torch - pygithub + - openai environment: ZENML_PROJECT_SECRET_NAME: llm_complete # configuration of the Model Control Plane model: - name: finetuned-zenml-docs-embeddings + name: dev_finetuned-zenml-docs-embeddings version: latest license: Apache 2.0 description: Finetuned LLM on ZenML docs diff --git a/llm-complete-guide/configs/rag_local_dev.yaml b/llm-complete-guide/configs/dev/rag.yaml similarity index 85% rename from llm-complete-guide/configs/rag_local_dev.yaml rename to llm-complete-guide/configs/dev/rag.yaml index 334044b5..637da8bb 100644 --- a/llm-complete-guide/configs/rag_local_dev.yaml +++ b/llm-complete-guide/configs/dev/rag.yaml @@ -23,7 +23,7 @@ settings: # configuration of the Model Control Plane model: - name: finetuned-zenml-docs-embeddings + name: dev_finetuned-zenml-docs-embeddings license: Apache 2.0 description: Finetuned LLM on ZenML docs tags: ["rag", "finetuned"] @@ -31,4 +31,5 @@ model: steps: url_scraper: parameters: - docs_url: https://docs.zenml.io/stack-components/orchestrators + docs_url: https://docs.zenml.io/ + use_dev_set: true diff --git a/llm-complete-guide/configs/rag_eval.yaml b/llm-complete-guide/configs/dev/rag_eval.yaml similarity index 80% rename from llm-complete-guide/configs/rag_eval.yaml rename to llm-complete-guide/configs/dev/rag_eval.yaml index 6116f3bc..fd8df1e5 100644 --- a/llm-complete-guide/configs/rag_eval.yaml +++ b/llm-complete-guide/configs/dev/rag_eval.yaml @@ -16,7 +16,8 @@ settings: # configuration of the Model Control Plane model: - name: finetuned-zenml-docs-embeddings + name: dev_finetuned-zenml-docs-embeddings license: Apache 2.0 description: Finetuned LLM on ZenML docs - tags: ["rag", "finetuned"] \ No newline at end of file + tags: ["rag", "finetuned"] + version: latest \ No newline at end of file diff --git a/llm-complete-guide/configs/synthetic.yaml b/llm-complete-guide/configs/dev/synthetic.yaml similarity index 92% rename from llm-complete-guide/configs/synthetic.yaml rename to llm-complete-guide/configs/dev/synthetic.yaml index 6b052429..bb7ebbf7 100644 --- a/llm-complete-guide/configs/synthetic.yaml +++ b/llm-complete-guide/configs/dev/synthetic.yaml @@ -25,13 +25,14 @@ settings: - torch - distilabel - pygithub + - openai environment: ZENML_PROJECT_SECRET_NAME: llm_complete # configuration of the Model Control Plane model: - name: finetuned-zenml-docs-embeddings + name: dev_finetuned-zenml-docs-embeddings version: latest license: Apache 2.0 description: Finetuned LLM on ZenML docs diff --git a/llm-complete-guide/configs/production/embeddings.yaml b/llm-complete-guide/configs/production/embeddings.yaml new file mode 100644 index 00000000..7d027743 --- /dev/null +++ b/llm-complete-guide/configs/production/embeddings.yaml @@ -0,0 +1,45 @@ +# enable_cache: False + +# environment configuration +settings: + docker: + requirements: + - ratelimit + - pgvector + - psycopg2-binary + - beautifulsoup4 + - unstructured + - pandas + - numpy + - sentence-transformers>=3 + - transformers[torch]==4.43.1 + - litellm + - ollama + - tiktoken + - umap-learn + - matplotlib + - pyarrow + - rerankers[flashrank] + - datasets + - torch + - pygithub + - openai + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + + +# configuration of the Model Control Plane +model: + name: prod_finetuned-zenml-docs-embeddings + version: latest + license: Apache 2.0 + description: Finetuned LLM on ZenML docs + tags: ["rag", "finetuned"] + +steps: + finetune: + step_operator: "gcp_a100" + settings: + step_operator.vertex: + accelerator_count: 1 + accelerator_type: NVIDIA_TESLA_A100 \ No newline at end of file diff --git a/llm-complete-guide/configs/rag_gcp.yaml b/llm-complete-guide/configs/production/eval.yaml similarity index 51% rename from llm-complete-guide/configs/rag_gcp.yaml rename to llm-complete-guide/configs/production/eval.yaml index 462b5790..7b07c33b 100644 --- a/llm-complete-guide/configs/rag_gcp.yaml +++ b/llm-complete-guide/configs/production/eval.yaml @@ -1,3 +1,5 @@ +enable_cache: False + # environment configuration settings: docker: @@ -11,31 +13,20 @@ settings: - psycopg2-binary - tiktoken - ratelimit - - rerankers + - rerankers[flashrank] + - matplotlib + - pillow - pygithub environment: ZENML_PROJECT_SECRET_NAME: llm_complete ZENML_ENABLE_RICH_TRACEBACK: FALSE ZENML_LOGGING_VERBOSITY: INFO -steps: - url_scraper: - parameters: - docs_url: https://docs.zenml.io - repo_url: https://github.com/zenml-io/zenml - website_url: https://zenml.io - -# generate_embeddings: -# step_operator: "terraform-gcp-6c0fd52233ca" -# settings: -# step_operator.vertex: -# accelerator_type: "NVIDIA_TESLA_P100" -# accelerator_count: 1 -# machine_type: "n1-standard-8" - # configuration of the Model Control Plane model: - name: finetuned-zenml-docs-embeddings + name: prod_finetuned-zenml-docs-embeddings + version: latest license: Apache 2.0 description: Finetuned LLM on ZenML docs - tags: ["rag", "finetuned"] \ No newline at end of file + tags: ["rag", "finetuned"] + limitations: "Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split." \ No newline at end of file diff --git a/llm-complete-guide/configs/production/rag.yaml b/llm-complete-guide/configs/production/rag.yaml new file mode 100644 index 00000000..59ad858f --- /dev/null +++ b/llm-complete-guide/configs/production/rag.yaml @@ -0,0 +1,46 @@ +enable_cache: True + +# environment configuration +settings: + docker: + requirements: + - unstructured + - sentence-transformers>=3 + - pgvector + - datasets + - litellm + - numpy + - psycopg2-binary + - tiktoken + - ratelimit + - rerankers + - pygithub + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + ZENML_ENABLE_RICH_TRACEBACK: FALSE + ZENML_LOGGING_VERBOSITY: INFO + + +# configuration of the Model Control Plane +model: + name: prod_finetuned-zenml-docs-embeddings + license: Apache 2.0 + description: A fine-tuned embeddings model for ZenML documentation. Used for RAG retrieval. + tags: ["rag", "finetuned"] + limitations: Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split. + trade_offs: Focused on a specific RAG retrieval use case. Not generalizable to other domains. + audience: ZenML users + use_cases: RAG retrieval + +steps: + url_scraper: + parameters: + docs_url: https://docs.zenml.io + use_dev_set: false + enable_cache: true +# generate_embeddings: +# step_operator: "sagemaker" +# settings: +# step_operator.sagemaker: +# accelerator_count: 1 +# accelerator_type: NVIDIA_TESLA_A100 \ No newline at end of file diff --git a/llm-complete-guide/configs/production/synthetic.yaml b/llm-complete-guide/configs/production/synthetic.yaml new file mode 100644 index 00000000..5d5bac27 --- /dev/null +++ b/llm-complete-guide/configs/production/synthetic.yaml @@ -0,0 +1,37 @@ +# environment configuration +settings: + docker: + requirements: + - ratelimit + - pgvector + - psycopg2-binary + - beautifulsoup4 + - unstructured + - pandas + - numpy + - sentence-transformers>=3 + - transformers==4.43.1 + - litellm + - ollama + - tiktoken + - umap-learn + - matplotlib + - pyarrow + - rerankers[flashrank] + - datasets + - torch + - distilabel + - argilla + - pygithub + - openai + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + + +# configuration of the Model Control Plane +model: + name: prod_finetuned-zenml-docs-embeddings + version: latest + license: Apache 2.0 + description: Finetuned LLM on ZenML docs + tags: ["rag", "finetuned"] diff --git a/llm-complete-guide/configs/staging/embeddings.yaml b/llm-complete-guide/configs/staging/embeddings.yaml new file mode 100644 index 00000000..d8bbfc45 --- /dev/null +++ b/llm-complete-guide/configs/staging/embeddings.yaml @@ -0,0 +1,37 @@ +# enable_cache: False + +# environment configuration +settings: + docker: + requirements: + - ratelimit + - pgvector + - psycopg2-binary + - beautifulsoup4 + - unstructured + - pandas + - numpy + - sentence-transformers>=3 + - transformers[torch]==4.43.1 + - litellm + - ollama + - tiktoken + - umap-learn + - matplotlib + - pyarrow + - rerankers[flashrank] + - datasets + - torch + - pygithub + - openai + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + + +# configuration of the Model Control Plane +model: + name: staging_finetuned-zenml-docs-embeddings + version: latest + license: Apache 2.0 + description: Finetuned LLM on ZenML docs + tags: ["rag", "finetuned"] \ No newline at end of file diff --git a/llm-complete-guide/configs/staging/eval.yaml b/llm-complete-guide/configs/staging/eval.yaml new file mode 100644 index 00000000..aee20b09 --- /dev/null +++ b/llm-complete-guide/configs/staging/eval.yaml @@ -0,0 +1,32 @@ +enable_cache: False + +# environment configuration +settings: + docker: + requirements: + - unstructured + - sentence-transformers>=3 + - pgvector + - datasets + - litellm + - numpy + - psycopg2-binary + - tiktoken + - ratelimit + - rerankers[flashrank] + - matplotlib + - pillow + - pygithub + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + ZENML_ENABLE_RICH_TRACEBACK: FALSE + ZENML_LOGGING_VERBOSITY: INFO + +# configuration of the Model Control Plane +model: + name: staging_finetuned-zenml-docs-embeddings + version: latest + license: Apache 2.0 + description: Finetuned LLM on ZenML docs + tags: ["rag", "finetuned"] + limitations: "Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split." \ No newline at end of file diff --git a/llm-complete-guide/configs/staging/rag.yaml b/llm-complete-guide/configs/staging/rag.yaml new file mode 100644 index 00000000..5107f01e --- /dev/null +++ b/llm-complete-guide/configs/staging/rag.yaml @@ -0,0 +1,41 @@ +enable_cache: False + +# environment configuration +settings: + docker: + requirements: + - unstructured + - sentence-transformers>=3 + - pgvector + - datasets + - litellm + - numpy + - psycopg2-binary + - tiktoken + - ratelimit + - rerankers + - pygithub + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + ZENML_ENABLE_RICH_TRACEBACK: FALSE + ZENML_LOGGING_VERBOSITY: INFO + + +# configuration of the Model Control Plane +model: + name: staging_finetuned-zenml-docs-embeddings + license: Apache 2.0 + description: A fine-tuned embeddings model for ZenML documentation. Used for RAG retrieval. + tags: ["rag", "finetuned"] + limitations: Only works for ZenML documentation. Not generalizable to other domains. Entirely build with synthetic data. The data is also quite noisy on account of how the chunks were split. + trade_offs: Focused on a specific RAG retrieval use case. Not generalizable to other domains. + audience: ZenML users + use_cases: RAG retrieval + +steps: + url_scraper: + parameters: + docs_url: https://docs.zenml.io + use_dev_set: false + enable_cache: true + step_operator: "gcp_a100" \ No newline at end of file diff --git a/llm-complete-guide/configs/staging/synthetic.yaml b/llm-complete-guide/configs/staging/synthetic.yaml new file mode 100644 index 00000000..ba16d590 --- /dev/null +++ b/llm-complete-guide/configs/staging/synthetic.yaml @@ -0,0 +1,37 @@ +# environment configuration +settings: + docker: + requirements: + - ratelimit + - pgvector + - psycopg2-binary + - beautifulsoup4 + - unstructured + - pandas + - numpy + - sentence-transformers>=3 + - transformers==4.43.1 + - litellm + - ollama + - tiktoken + - umap-learn + - matplotlib + - pyarrow + - rerankers[flashrank] + - datasets + - torch + - distilabel + - argilla + - pygithub + - openai + environment: + ZENML_PROJECT_SECRET_NAME: llm_complete + + +# configuration of the Model Control Plane +model: + name: staging_finetuned-zenml-docs-embeddings + version: latest + license: Apache 2.0 + description: Finetuned LLM on ZenML docs + tags: ["rag", "finetuned"] diff --git a/llm-complete-guide/notebooks/reranking.ipynb b/llm-complete-guide/notebooks/reranking.ipynb index 94342811..80f8507a 100644 --- a/llm-complete-guide/notebooks/reranking.ipynb +++ b/llm-complete-guide/notebooks/reranking.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12,128 +12,9 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading TransformerRanker model mixedbread-ai/mxbai-rerank-large-v1\n", - "No device set\n", - "Using device cuda\n", - "No dtype set\n", - "Using dtype torch.float16\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "867edac78ccb49aea85b6e96c03c201b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "config.json: 0%| | 0.00/970 [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "917b3071bcc7408486af412a17a3636e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "model.safetensors: 0%| | 0.00/870M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded model mixedbread-ai/mxbai-rerank-large-v1\n", - "Using device cuda.\n", - "Using dtype torch.float16.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b28cce9cc011465688172e44656f7385", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/1.45k [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f7daf97b1ca44b9791e2df292169e392", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "spm.model: 0%| | 0.00/2.46M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2392573238c7454cb4fa95ae3d0e28a9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "tokenizer.json: 0%| | 0.00/8.65M [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c44313849c8f4e9e825752931bd6bb6b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "added_tokens.json: 0%| | 0.00/23.0 [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "31d1ccfda744411d8ba7ea3429c9ff57", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "special_tokens_map.json: 0%| | 0.00/970 [00:00, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# ranker = Reranker('cross-encoder')\n", "# ranker = Reranker('avsolatorio/GIST-large-Embedding-v0')\n", @@ -142,12 +23,14 @@ "# ranker = Reranker(\"t5\")\n", "# ranker = Reranker(\"unicamp-dl/InRanker-base\", model_type = \"t5\")\n", "# ranker = Reranker(\"colbert\")\n", - "ranker = Reranker(\"mixedbread-ai/mxbai-rerank-large-v1\", model_type=\"cross-encoder\")" + "ranker = Reranker(\n", + " \"mixedbread-ai/mxbai-rerank-large-v1\", model_type=\"cross-encoder\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -162,43 +45,9 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
RankedResults(\n", - " results=[\n", - " Result(doc_id=0, text='I like to play soccer', score=-1.2607421875, rank=1),\n", - " Result(doc_id=2, text='I like to play basketball', score=-1.2890625, rank=2),\n", - " Result(doc_id=1, text='I like to play football', score=-1.9384765625, rank=3),\n", - " Result(doc_id=3, text='I love dogs', score=-5.12109375, rank=4),\n", - " Result(doc_id=4, text='Catcher in the Rye is a great book', score=-6.19140625, rank=5)\n", - " ],\n", - " query=\"What's your favorite sport?\",\n", - " has_scores=True\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;35mRankedResults\u001b[0m\u001b[1m(\u001b[0m\n", - " \u001b[33mresults\u001b[0m=\u001b[1m[\u001b[0m\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m0\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I like to play soccer'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-1.2607421875\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m2\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I like to play basketball'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-1.2890625\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m2\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I like to play football'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-1.9384765625\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m3\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m3\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I love dogs'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-5.12109375\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m4\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m4\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'Catcher in the Rye is a great book'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-6.19140625\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m5\u001b[0m\u001b[1m)\u001b[0m\n", - " \u001b[1m]\u001b[0m,\n", - " \u001b[33mquery\u001b[0m=\u001b[32m\"What\u001b[0m\u001b[32m's your favorite sport?\"\u001b[0m,\n", - " \u001b[33mhas_scores\u001b[0m=\u001b[3;92mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "results = ranker.rank(query=\"What's your favorite sport?\", docs=texts)\n", "\n", @@ -207,35 +56,9 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n", - " 'I like to play soccer',\n", - " 'I like to play basketball',\n", - " 'I like to play football',\n", - " 'I love dogs',\n", - " 'Catcher in the Rye is a great book'\n", - "]\n", - "\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - " \u001b[32m'I like to play soccer'\u001b[0m,\n", - " \u001b[32m'I like to play basketball'\u001b[0m,\n", - " \u001b[32m'I like to play football'\u001b[0m,\n", - " \u001b[32m'I love dogs'\u001b[0m,\n", - " \u001b[32m'Catcher in the Rye is a great book'\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "print([document.text for document in results.results])" ] @@ -256,36 +79,24 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "attempted relative import with no known parent package", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msteps\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_retrieval\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m query_similar_docs\n", - "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package" - ] - } - ], + "outputs": [], "source": [ "embedded_question = get_embeddings(question)\n", - " db_conn = get_db_conn()\n", - " num_docs = 20 if use_reranking else 5\n", - " # get (content, url) tuples for the top n similar documents\n", - " top_similar_docs = get_topn_similar_docs(\n", - " embedded_question, db_conn, n=num_docs, include_metadata=True\n", - " )\n", - "\n", - " if use_reranking:\n", - " urls = rerank_documents(question, top_similar_docs)[:5]\n", - " else:\n", - " urls = [doc[1] for doc in top_similar_docs] # Unpacking URLs\n", - "\n", - " return (question, url_ending, urls)\n" + "db_conn = get_db_conn()\n", + "num_docs = 20 if use_reranking else 5\n", + "# get (content, url) tuples for the top n similar documents\n", + "top_similar_docs = get_topn_similar_docs(\n", + " embedded_question, db_conn, n=num_docs, include_metadata=True\n", + ")\n", + "\n", + "if use_reranking:\n", + " urls = rerank_documents(question, top_similar_docs)[:5]\n", + "else:\n", + " urls = [doc[1] for doc in top_similar_docs] # Unpacking URLs\n", + "\n", + "return (question, url_ending, urls)" ] }, { @@ -297,32 +108,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ranked documents for query: quick brown fox\n", - "Document: A quick brown fox jumps over the lazy dog\n", - "Score: 0.6937165451385258\n", - "\n", - "Document: The quick brown fox jumps over the lazy dog\n", - "Score: 0.6928630071635998\n", - "\n", - "Document: The quick brown fox is quick and brown\n", - "Score: 0.6868308019742143\n", - "\n", - "Document: The quick brown fox is different from the lazy dog\n", - "Score: 0.6802242759508812\n", - "\n", - "Document: The lazy dog is lazy and sleepy\n", - "Score: 0.5727275080137214\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", @@ -338,7 +126,11 @@ "]\n", "\n", "# Toy queries and their corresponding relevant document indices\n", - "queries = [(\"quick fox\", [0, 1, 2]), (\"lazy dog\", [3, 4]), (\"brown fox\", [0, 1, 2, 4])]\n", + "queries = [\n", + " (\"quick fox\", [0, 1, 2]),\n", + " (\"lazy dog\", [3, 4]),\n", + " (\"brown fox\", [0, 1, 2, 4]),\n", + "]\n", "\n", "# Create TF-IDF vectorizer\n", "vectorizer = TfidfVectorizer()\n", @@ -355,7 +147,9 @@ " query_vector = vectorizer.transform([query])\n", " for doc_idx, doc_vector in enumerate(document_vectors):\n", " X_train.append(\n", - " np.concatenate((query_vector.toarray()[0], doc_vector.toarray()[0]))\n", + " np.concatenate(\n", + " (query_vector.toarray()[0], doc_vector.toarray()[0])\n", + " )\n", " )\n", " y_train.append(1 if doc_idx in relevant_docs else 0)\n", "\n", @@ -367,7 +161,9 @@ "scores = []\n", "\n", "for doc_vector in document_vectors:\n", - " input_vector = np.concatenate((query_vector.toarray()[0], doc_vector.toarray()[0]))\n", + " input_vector = np.concatenate(\n", + " (query_vector.toarray()[0], doc_vector.toarray()[0])\n", + " )\n", " score = reranker.predict_proba([input_vector])[0][1]\n", " scores.append(score)\n", "\n", @@ -381,28 +177,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading default cross-encoder model for language en\n", - "Warning: Model type could not be auto-mapped with the defaults list. Defaulting to TransformerRanker.\n", - "If your model is NOT intended to be ran as a one-label cross-encoder, please reload it and specify the model_type! Otherwise, you may ignore this warning. You may specify `model_type='cross-encoder'` to suppress this warning in the future.\n", - "Default Model: mixedbread-ai/mxbai-rerank-base-v1\n", - "Loading TransformerRanker model mixedbread-ai/mxbai-rerank-base-v1\n", - "No device set\n", - "Using device cuda\n", - "No dtype set\n", - "Using dtype torch.float16\n", - "Loaded model mixedbread-ai/mxbai-rerank-base-v1\n", - "Using device cuda.\n", - "Using dtype torch.float16.\n" - ] - } - ], + "outputs": [], "source": [ "from rerankers import Reranker\n", "\n", @@ -422,45 +199,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
RankedResults(\n", - " results=[\n", - " Result(doc_id=5, text='I like to play basketball', score=-0.46533203125, rank=1),\n", - " Result(doc_id=0, text='I like to play soccer', score=-0.7353515625, rank=2),\n", - " Result(doc_id=1, text='I like to play football', score=-0.9677734375, rank=3),\n", - " Result(doc_id=2, text='War and Peace is a great book', score=-5.40234375, rank=4),\n", - " Result(doc_id=3, text='I love dogs', score=-5.5859375, rank=5),\n", - " Result(doc_id=4, text=\"Ginger cats aren't very smart\", score=-5.94921875, rank=6)\n", - " ],\n", - " query=\"What's your favorite sport?\",\n", - " has_scores=True\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;35mRankedResults\u001b[0m\u001b[1m(\u001b[0m\n", - " \u001b[33mresults\u001b[0m=\u001b[1m[\u001b[0m\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m5\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I like to play basketball'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-0.46533203125\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m0\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I like to play soccer'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-0.7353515625\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m2\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m1\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I like to play football'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-0.9677734375\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m3\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m2\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'War and Peace is a great book'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-5.40234375\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m4\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m3\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m'I love dogs'\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-5.5859375\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m5\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mResult\u001b[0m\u001b[1m(\u001b[0m\u001b[33mdoc_id\u001b[0m=\u001b[1;36m4\u001b[0m, \u001b[33mtext\u001b[0m=\u001b[32m\"Ginger\u001b[0m\u001b[32m cats aren't very smart\"\u001b[0m, \u001b[33mscore\u001b[0m=\u001b[1;36m-5.94921875\u001b[0m, \u001b[33mrank\u001b[0m=\u001b[1;36m6\u001b[0m\u001b[1m)\u001b[0m\n", - " \u001b[1m]\u001b[0m,\n", - " \u001b[33mquery\u001b[0m=\u001b[32m\"What\u001b[0m\u001b[32m's your favorite sport?\"\u001b[0m,\n", - " \u001b[33mhas_scores\u001b[0m=\u001b[3;92mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "print(results)" ] @@ -475,7 +216,7 @@ ], "metadata": { "kernelspec": { - "display_name": "new-rag", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -489,9 +230,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.9" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py index 895c4df3..82a97b21 100644 --- a/llm-complete-guide/pipelines/llm_basic_rag.py +++ b/llm-complete-guide/pipelines/llm_basic_rag.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from litellm import config_path from steps.populate_index import ( generate_embeddings, diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py index 2152fda4..d224da93 100644 --- a/llm-complete-guide/run.py +++ b/llm-complete-guide/run.py @@ -13,8 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import warnings +from pathlib import Path # Suppress the specific FutureWarning from huggingface_hub warnings.filterwarnings( @@ -62,33 +62,20 @@ Run the ZenML LLM RAG complete guide project pipelines. """ ) -@click.option( - "--rag", - "rag", - is_flag=True, - default=False, - help="Whether to run the pipeline that creates the dataset.", -) -@click.option( - "--deploy", - "deploy", - is_flag=True, - default=False, - help="Whether to deploy a Gradio app to serve the RAG functionality.", -) -@click.option( - "--evaluation", - "evaluation", - is_flag=True, - default=False, - help="Whether to run the evaluation pipeline.", -) -@click.option( - "--query", - "query", - type=str, - required=False, - help="Query the RAG model.", +@click.argument( + "pipeline", + type=click.Choice( + [ + "rag", + "deploy", + "evaluation", + "query", + "synthetic", + "embeddings", + "chunks", + ] + ), + required=True, ) @click.option( "--model", @@ -112,41 +99,20 @@ default=False, help="Disable cache.", ) -@click.option( - "--synthetic", - "synthetic", - is_flag=True, - default=False, - help="Run the synthetic data pipeline.", -) -@click.option( - "--embeddings", - "embeddings", - is_flag=True, - default=False, - help="Fine-tunes embeddings.", -) @click.option( "--argilla", - "argilla", + "use_argilla", is_flag=True, default=False, help="Uses Argilla annotations.", ) @click.option( "--reranked", - "reranked", + "use_reranker", is_flag=True, default=False, help="Whether to use the reranker.", ) -@click.option( - "--chunks", - "chunks", - is_flag=True, - default=False, - help="Generate chunks for Hugging Face dataset", -) @click.option( "--config", "config", @@ -154,107 +120,94 @@ help="Path to config", ) def main( - rag: bool = False, - deploy: bool = False, - evaluation: bool = False, - query: Optional[str] = None, + pipeline: str, + query_text: Optional[str] = None, model: str = OPENAI_MODEL, no_cache: bool = False, - synthetic: bool = False, - embeddings: bool = False, - argilla: bool = False, - reranked: bool = False, - chunks: bool = False, - config: str = None, + use_argilla: bool = False, + use_reranker: bool = False, + config: Optional[str] = None, ): """Main entry point for the pipeline execution. Args: - rag (bool): If `True`, the basic RAG pipeline will be run. - deploy (bool): If `True`, a Gradio app will be deployed to serve the RAG functionality. - evaluation (bool): If `True`, the evaluation pipeline will be run. - query (Optional[str]): If provided, the RAG model will be queried with this string. - model (str): The model to use for the completion. Default is OPENAI_MODEL. - no_cache (bool): If `True`, cache will be disabled. - synthetic (bool): If `True`, the synthetic data pipeline will be run. - embeddings (bool): If `True`, the embeddings will be fine-tuned. - argilla (bool): If `True`, the Argilla annotations will be used. - chunks (bool): If `True`, the chunks pipeline will be run. - reranked (bool): If `True`, rerankers will be used - config (str): Path to config + pipeline (str): The pipeline to execute (rag, deploy, evaluation, etc.) + query_text (Optional[str]): Query text when using 'query' command + model (str): The model to use for the completion + no_cache (bool): If True, cache will be disabled + use_argilla (bool): If True, Argilla annotations will be used + use_reranker (bool): If True, rerankers will be used + config (Optional[str]): Path to config file """ pipeline_args = {"enable_cache": not no_cache} embeddings_finetune_args = { "enable_cache": not no_cache, "steps": { "prepare_load_data": { - "parameters": {"use_argilla_annotations": argilla} + "parameters": {"use_argilla_annotations": use_argilla} } }, } - if query: + # Handle config path + config_path = None + if config: + config_path = Path(__file__).parent / "configs" / config + + # Set default config paths based on pipeline + if not config_path: + config_mapping = { + "rag": "dev/rag.yaml", + "evaluation": "dev/rag_eval.yaml", + "synthetic": "dev/synthetic.yaml", + "embeddings": "dev/embeddings.yaml", + } + if pipeline in config_mapping: + config_path = ( + Path(__file__).parent / "configs" / config_mapping[pipeline] + ) + + # Execute query + if pipeline == "query": + if not query_text: + raise click.UsageError( + "--query-text is required when using 'query' command" + ) response = process_input_with_retrieval( - query, model=model, use_reranking=reranked + query_text, model=model, use_reranking=use_reranker ) - - # print rich markdown to the console console = Console() md = Markdown(response) console.print(md) + return - config_path = None - if config: - config_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "configs", - config, - ) - - if rag: - if not config_path: - config_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "configs", - "rag_local_dev.yaml", - ) + # Execute the appropriate pipeline + if pipeline == "rag": llm_basic_rag.with_options(config_path=config_path, **pipeline_args)() - if deploy: + # Also deploy if config is provided + if config: rag_deployment.with_options( config_path=config_path, **pipeline_args )() - if deploy: + + elif pipeline == "deploy": rag_deployment.with_options(**pipeline_args)() - if evaluation: - if not config_path: - config_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "configs", - "rag_eval.yaml", - ) + + elif pipeline == "evaluation": pipeline_args["enable_cache"] = False llm_eval.with_options(config_path=config_path)() - if synthetic: - if not config_path: - config_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "configs", - "synthetic.yaml", - ) + + elif pipeline == "synthetic": generate_synthetic_data.with_options( config_path=config_path, **pipeline_args )() - if embeddings: - if not config_path: - config_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "configs", - "embeddings.yaml", - ) + + elif pipeline == "embeddings": finetune_embeddings.with_options( config_path=config_path, **embeddings_finetune_args )() - if chunks: + + elif pipeline == "chunks": generate_chunk_questions.with_options(**pipeline_args)() diff --git a/llm-complete-guide/steps/eval_visualisation.py b/llm-complete-guide/steps/eval_visualisation.py index 4b7b004b..badd62c1 100644 --- a/llm-complete-guide/steps/eval_visualisation.py +++ b/llm-complete-guide/steps/eval_visualisation.py @@ -65,7 +65,7 @@ def create_image( fontweight="bold", ) else: - bar_color = colors[i] if alternate_colours else "blue" + colors[i] if alternate_colours else "blue" text_color = "white" ax.text( v diff --git a/llm-complete-guide/steps/rag_deployment.py b/llm-complete-guide/steps/rag_deployment.py index a750dde6..99a8c911 100644 --- a/llm-complete-guide/steps/rag_deployment.py +++ b/llm-complete-guide/steps/rag_deployment.py @@ -2,6 +2,8 @@ import webbrowser from huggingface_hub import HfApi + +from utils.hf_utils import get_hf_token from utils.llm_utils import process_input_with_retrieval from zenml import step from zenml.client import Client @@ -9,9 +11,8 @@ secret = Client().get_secret("llm-complete") -ZENML_API_TOKEN = secret.secret_values["zenml_api_token"] -ZENML_STORE_URL = secret.secret_values["zenml_store_url"] -HF_TOKEN = os.getenv("HF_TOKEN") +ZENML_API_TOKEN = os.environ.get("ZENML_API_TOKEN") +ZENML_STORE_URL = os.environ.get("ZENML_STORE_URL") SPACE_USERNAME = os.environ.get("ZENML_HF_USERNAME", "zenml") SPACE_NAME = os.environ.get("ZENML_HF_SPACE_NAME", "llm-complete-guide-rag") @@ -50,7 +51,7 @@ def predict(message, history): def upload_files_to_repo( - api, repo_id: str, files_mapping: dict, token: str = HF_TOKEN + api, repo_id: str, files_mapping: dict, token: str ): """Upload multiple files to a Hugging Face repository @@ -89,7 +90,7 @@ def gradio_rag_deployment() -> None: space_sdk="gradio", private=True, exist_ok=True, - token=HF_TOKEN, + token=get_hf_token(), ) api.add_space_secret( repo_id=hf_repo_id, @@ -112,6 +113,6 @@ def gradio_rag_deployment() -> None: hf_repo_requirements: "requirements.txt", } - upload_files_to_repo(api, hf_repo_id, files_to_upload, HF_TOKEN) + upload_files_to_repo(api, hf_repo_id, files_to_upload, get_hf_token()) webbrowser.open(f"https://huggingface.co/spaces/{hf_repo_id}") diff --git a/llm-complete-guide/steps/url_scraper.py b/llm-complete-guide/steps/url_scraper.py index f7910e26..e2d85df5 100644 --- a/llm-complete-guide/steps/url_scraper.py +++ b/llm-complete-guide/steps/url_scraper.py @@ -21,11 +21,12 @@ from steps.url_scraping_utils import get_all_pages -@step(enable_cache=True) +@step def url_scraper( docs_url: str = "https://docs.zenml.io", repo_url: str = "https://github.com/zenml-io/zenml", website_url: str = "https://zenml.io", + use_dev_set: bool = False ) -> Annotated[str, ArtifactConfig(name="urls")]: """Generates a list of relevant URLs to scrape. @@ -39,18 +40,19 @@ def url_scraper( """ # We comment this out to make this pipeline faster # examples_readme_urls = get_nested_readme_urls(repo_url) - docs_urls = get_all_pages(docs_url) + if use_dev_set: - # FOR TESTING ONLY - # docs_urls = [ - # "https://docs.zenml.io/getting-started/system-architectures", - # "https://docs.zenml.io/getting-started/core-concepts", - # "https://docs.zenml.io/user-guide/llmops-guide/rag-with-zenml/rag-85-loc", - # "https://docs.zenml.io/how-to/track-metrics-metadata/logging-metadata", - # "https://docs.zenml.io/how-to/debug-and-solve-issues", - # "https://docs.zenml.io/stack-components/step-operators/azureml", - # "https://docs.zenml.io/how-to/interact-with-secrets", - # ] + docs_urls = [ + "https://docs.zenml.io/getting-started/system-architectures", + "https://docs.zenml.io/getting-started/core-concepts", + "https://docs.zenml.io/user-guide/llmops-guide/rag-with-zenml/rag-85-loc", + "https://docs.zenml.io/how-to/track-metrics-metadata/logging-metadata", + "https://docs.zenml.io/how-to/debug-and-solve-issues", + "https://docs.zenml.io/stack-components/step-operators/azureml", + "https://docs.zenml.io/how-to/interact-with-secrets", + ] + else: + docs_urls = get_all_pages(docs_url) # website_urls = get_all_pages(website_url) # all_urls = docs_urls + website_urls + examples_readme_urls diff --git a/llm-complete-guide/utils/hf_utils.py b/llm-complete-guide/utils/hf_utils.py new file mode 100644 index 00000000..2de954fa --- /dev/null +++ b/llm-complete-guide/utils/hf_utils.py @@ -0,0 +1,8 @@ +from constants import SECRET_NAME +from zenml.client import Client + + +def get_hf_token() -> str: + api_key = Client().get_secret(SECRET_NAME).secret_values["hf_token"] + + return api_key diff --git a/llm-complete-guide/utils/openai_utils.py b/llm-complete-guide/utils/openai_utils.py index e67ba5f9..15b84cc5 100644 --- a/llm-complete-guide/utils/openai_utils.py +++ b/llm-complete-guide/utils/openai_utils.py @@ -2,7 +2,7 @@ from zenml.client import Client -def get_openai_api_key(): +def get_openai_api_key() -> str: api_key = Client().get_secret(SECRET_NAME).secret_values["openai_api_key"] return api_key