refactor: update embedding inference to use URL and token directly (#711)

mishig25 · web-flow · commit 3245cc85a357 · 2026-01-12T00:51:20.000+01:00
* refactor: update embedding inference to use URL and token directly

- Modified the embedding generation functions to accept HF_IE_URL instead of name and namespace.
- Updated the populate_search_engine workflow to include HF_IE_URL as an environment variable.
- Adjusted command-line arguments in embeddings.py to reflect the new URL-based approach for Hugging Face inference.

* refactor: simplify function definition for embedding inference

- Removed unnecessary line breaks in the call_embedding_inference function definition for improved readability.
diff --git a/.github/workflows/populate_search_engine.yml b/.github/workflows/populate_search_engine.yml
@@ -31,9 +31,10 @@ jobs:
 
       - name: Populate search engine from HF doc-build dataset
         env:
+          HF_IE_URL: ${{ secrets.HF_IE_URL }}
           HF_IE_TOKEN: ${{ secrets.HF_IE_TOKEN }}
           MEILISEARCH_KEY: ${{ secrets.MEILISEARCH_KEY }}
-        run: uv run doc-builder populate-search-engine --hf_ie_name embeddinggemma-300m-iyi --hf_ie_namespace huggingface
+        run: uv run doc-builder populate-search-engine
   
   gradio-job:
     runs-on: ubuntu-latest
diff --git a/src/doc_builder/build_embeddings.py b/src/doc_builder/build_embeddings.py
@@ -22,7 +22,7 @@
 
 import meilisearch
 import requests
-from huggingface_hub import get_inference_endpoint
+from huggingface_hub import InferenceClient
 from tqdm import tqdm
 
 from .autodoc import autodoc_markdown, resolve_links_in_text
@@ -738,22 +738,14 @@ def chunks_to_embeddings(client, chunks, is_python_module) -> list[Embedding]:
     return embeddings
 
 
-def call_embedding_inference(
-    chunks: list[Chunk], hf_ie_name, hf_ie_namespace, hf_ie_token, is_python_module
-) -> list[Embedding]:
+def call_embedding_inference(chunks: list[Chunk], hf_ie_url, hf_ie_token, is_python_module) -> list[Embedding]:
     """
     Using https://huggingface.co/inference-endpoints with a text embedding model
     """
     batch_size = 20
     embeddings = []
 
-    endpoint = get_inference_endpoint(name=hf_ie_name, namespace=hf_ie_namespace, token=hf_ie_token)
-    if endpoint.status != "running":
-        print("[inference endpoint] restarting...")
-        endpoint.resume().wait()
-        print("[inference endpoint] restarted")
-
-    client = endpoint.client
+    client = InferenceClient(base_url=hf_ie_url, token=hf_ie_token)
 
     with ThreadPoolExecutor(max_workers=16) as executor:
         future_to_chunk = {
@@ -775,8 +767,7 @@ def call_embedding_inference(
 def build_embeddings(
     package_name,
     doc_folder,
-    hf_ie_name,
-    hf_ie_namespace,
+    hf_ie_url,
     hf_ie_token,
     meilisearch_key,
     version="main",
@@ -830,7 +821,7 @@ def build_embeddings(
     )
 
     # Step 2: create embeddings
-    embeddings = call_embedding_inference(chunks, hf_ie_name, hf_ie_namespace, hf_ie_token, is_python_module)
+    embeddings = call_embedding_inference(chunks, hf_ie_url, hf_ie_token, is_python_module)
 
     # Step 3: push embeddings to vector database (meilisearch)
     client = meilisearch.Client("https://edge.meilisearch.com", meilisearch_key)
@@ -852,7 +843,7 @@ def clean_meilisearch(meilisearch_key: str, swap: bool):
     print("[meilisearch] successfully swapped & deleted temp index.")
 
 
-def add_gradio_docs(hf_ie_name: str, hf_ie_namespace: str, hf_ie_token: str, meilisearch_key: str):
+def add_gradio_docs(hf_ie_url: str, hf_ie_token: str, meilisearch_key: str):
     """Add Gradio documentation to embeddings."""
     # Step 1: download the documentation
     url = "https://huggingface.co/datasets/gradio/docs/resolve/main/docs.json"
@@ -877,13 +868,7 @@ def add_gradio_docs(hf_ie_name: str, hf_ie_namespace: str, hf_ie_token: str, mei
     batch_size = 20
     embeddings = []
 
-    endpoint = get_inference_endpoint(name=hf_ie_name, namespace=hf_ie_namespace, token=hf_ie_token)
-    if endpoint.status != "running":
-        print("[inference endpoint] restarting...")
-        endpoint.resume().wait()
-        print("[inference endpoint] restarted")
-
-    client = endpoint.client
+    client = InferenceClient(base_url=hf_ie_url, token=hf_ie_token)
 
     with ThreadPoolExecutor(max_workers=16) as executor:
         future_to_chunk = {
diff --git a/src/doc_builder/commands/embeddings.py b/src/doc_builder/commands/embeddings.py
@@ -52,9 +52,12 @@ def process_hf_docs_command(args):
     # If embeddings are requested
     if not args.skip_embeddings:
         # Get credentials from args or environment variables
+        hf_ie_url = get_credential(args.hf_ie_url, "HF_IE_URL")
         hf_ie_token = get_credential(args.hf_ie_token, "HF_IE_TOKEN")
         meilisearch_key = get_credential(args.meilisearch_key, "MEILISEARCH_KEY")
 
+        if not hf_ie_url:
+            raise ValueError("HF_IE_URL is required. Set via --hf_ie_url or HF_IE_URL env var.")
         if not hf_ie_token:
             raise ValueError("HF_IE_TOKEN is required. Set via --hf_ie_token or HF_IE_TOKEN env var.")
         if not meilisearch_key:
@@ -76,8 +79,7 @@ def process_hf_docs_command(args):
 
         embeddings = call_embedding_inference(
             all_chunks,
-            args.hf_ie_name,
-            args.hf_ie_namespace,
+            hf_ie_url,
             hf_ie_token,
             is_python_module=False,  # Pre-built docs are not Python modules
         )
@@ -111,14 +113,17 @@ def meilisearch_clean_command(args):
 def add_gradio_docs_command(args):
     """Wrapper for add_gradio_docs that supports environment variables."""
     hf_ie_token = get_credential(args.hf_ie_token, "HF_IE_TOKEN")
+    hf_ie_url = get_credential(args.hf_ie_url, "HF_IE_URL")
     meilisearch_key = get_credential(args.meilisearch_key, "MEILISEARCH_KEY")
 
     if not hf_ie_token:
         raise ValueError("HF_IE_TOKEN is required. Set via --hf_ie_token or HF_IE_TOKEN env var.")
+    if not hf_ie_url:
+        raise ValueError("HF_IE_URL is required. Set via --hf_ie_url or HF_IE_URL env var.")
     if not meilisearch_key:
         raise ValueError("MEILISEARCH_KEY is required. Set via --meilisearch_key or MEILISEARCH_KEY env var.")
 
-    add_gradio_docs(args.hf_ie_name, args.hf_ie_namespace, hf_ie_token, meilisearch_key)
+    add_gradio_docs(hf_ie_url, hf_ie_token, meilisearch_key)
 
 
 def embeddings_command_parser(subparsers=None):
@@ -146,9 +151,8 @@ def embeddings_command_parser(subparsers=None):
             "Doc Builder add-gradio-docs command. Add Gradio documentation to embeddings."
         )
 
-    parser_add_gradio_docs.add_argument("--hf_ie_name", type=str, help="Inference Endpoints name.", required=True)
     parser_add_gradio_docs.add_argument(
-        "--hf_ie_namespace", type=str, help="Inference Endpoints namespace.", required=True
+        "--hf_ie_url", type=str, help="Inference Endpoints URL (or set HF_IE_URL env var).", required=False
     )
     parser_add_gradio_docs.add_argument(
         "--hf_ie_token", type=str, help="Hugging Face token (or set HF_IE_TOKEN env var).", required=False
@@ -192,15 +196,9 @@ def embeddings_command_parser(subparsers=None):
         help="Skip embedding generation and meilisearch upload (useful for testing)",
     )
     parser_process_hf_docs.add_argument(
-        "--hf_ie_name",
+        "--hf_ie_url",
         type=str,
-        help="Inference Endpoints name (required unless --skip-embeddings is set)",
-        required=False,
-    )
-    parser_process_hf_docs.add_argument(
-        "--hf_ie_namespace",
-        type=str,
-        help="Inference Endpoints namespace (required unless --skip-embeddings is set)",
+        help="Inference Endpoints URL (or set HF_IE_URL env var, required unless --skip-embeddings is set)",
         required=False,
     )
     parser_process_hf_docs.add_argument(