Skip to content

Commit 3245cc8

Browse files
authored
refactor: update embedding inference to use URL and token directly (#711)
* refactor: update embedding inference to use URL and token directly - Modified the embedding generation functions to accept HF_IE_URL instead of name and namespace. - Updated the populate_search_engine workflow to include HF_IE_URL as an environment variable. - Adjusted command-line arguments in embeddings.py to reflect the new URL-based approach for Hugging Face inference. * refactor: simplify function definition for embedding inference - Removed unnecessary line breaks in the call_embedding_inference function definition for improved readability.
1 parent 99c2aee commit 3245cc8

File tree

3 files changed

+20
-36
lines changed

3 files changed

+20
-36
lines changed

.github/workflows/populate_search_engine.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ jobs:
3131

3232
- name: Populate search engine from HF doc-build dataset
3333
env:
34+
HF_IE_URL: ${{ secrets.HF_IE_URL }}
3435
HF_IE_TOKEN: ${{ secrets.HF_IE_TOKEN }}
3536
MEILISEARCH_KEY: ${{ secrets.MEILISEARCH_KEY }}
36-
run: uv run doc-builder populate-search-engine --hf_ie_name embeddinggemma-300m-iyi --hf_ie_namespace huggingface
37+
run: uv run doc-builder populate-search-engine
3738

3839
gradio-job:
3940
runs-on: ubuntu-latest

src/doc_builder/build_embeddings.py

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import meilisearch
2424
import requests
25-
from huggingface_hub import get_inference_endpoint
25+
from huggingface_hub import InferenceClient
2626
from tqdm import tqdm
2727

2828
from .autodoc import autodoc_markdown, resolve_links_in_text
@@ -738,22 +738,14 @@ def chunks_to_embeddings(client, chunks, is_python_module) -> list[Embedding]:
738738
return embeddings
739739

740740

741-
def call_embedding_inference(
742-
chunks: list[Chunk], hf_ie_name, hf_ie_namespace, hf_ie_token, is_python_module
743-
) -> list[Embedding]:
741+
def call_embedding_inference(chunks: list[Chunk], hf_ie_url, hf_ie_token, is_python_module) -> list[Embedding]:
744742
"""
745743
Using https://huggingface.co/inference-endpoints with a text embedding model
746744
"""
747745
batch_size = 20
748746
embeddings = []
749747

750-
endpoint = get_inference_endpoint(name=hf_ie_name, namespace=hf_ie_namespace, token=hf_ie_token)
751-
if endpoint.status != "running":
752-
print("[inference endpoint] restarting...")
753-
endpoint.resume().wait()
754-
print("[inference endpoint] restarted")
755-
756-
client = endpoint.client
748+
client = InferenceClient(base_url=hf_ie_url, token=hf_ie_token)
757749

758750
with ThreadPoolExecutor(max_workers=16) as executor:
759751
future_to_chunk = {
@@ -775,8 +767,7 @@ def call_embedding_inference(
775767
def build_embeddings(
776768
package_name,
777769
doc_folder,
778-
hf_ie_name,
779-
hf_ie_namespace,
770+
hf_ie_url,
780771
hf_ie_token,
781772
meilisearch_key,
782773
version="main",
@@ -830,7 +821,7 @@ def build_embeddings(
830821
)
831822

832823
# Step 2: create embeddings
833-
embeddings = call_embedding_inference(chunks, hf_ie_name, hf_ie_namespace, hf_ie_token, is_python_module)
824+
embeddings = call_embedding_inference(chunks, hf_ie_url, hf_ie_token, is_python_module)
834825

835826
# Step 3: push embeddings to vector database (meilisearch)
836827
client = meilisearch.Client("https://edge.meilisearch.com", meilisearch_key)
@@ -852,7 +843,7 @@ def clean_meilisearch(meilisearch_key: str, swap: bool):
852843
print("[meilisearch] successfully swapped & deleted temp index.")
853844

854845

855-
def add_gradio_docs(hf_ie_name: str, hf_ie_namespace: str, hf_ie_token: str, meilisearch_key: str):
846+
def add_gradio_docs(hf_ie_url: str, hf_ie_token: str, meilisearch_key: str):
856847
"""Add Gradio documentation to embeddings."""
857848
# Step 1: download the documentation
858849
url = "https://huggingface.co/datasets/gradio/docs/resolve/main/docs.json"
@@ -877,13 +868,7 @@ def add_gradio_docs(hf_ie_name: str, hf_ie_namespace: str, hf_ie_token: str, mei
877868
batch_size = 20
878869
embeddings = []
879870

880-
endpoint = get_inference_endpoint(name=hf_ie_name, namespace=hf_ie_namespace, token=hf_ie_token)
881-
if endpoint.status != "running":
882-
print("[inference endpoint] restarting...")
883-
endpoint.resume().wait()
884-
print("[inference endpoint] restarted")
885-
886-
client = endpoint.client
871+
client = InferenceClient(base_url=hf_ie_url, token=hf_ie_token)
887872

888873
with ThreadPoolExecutor(max_workers=16) as executor:
889874
future_to_chunk = {

src/doc_builder/commands/embeddings.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,12 @@ def process_hf_docs_command(args):
5252
# If embeddings are requested
5353
if not args.skip_embeddings:
5454
# Get credentials from args or environment variables
55+
hf_ie_url = get_credential(args.hf_ie_url, "HF_IE_URL")
5556
hf_ie_token = get_credential(args.hf_ie_token, "HF_IE_TOKEN")
5657
meilisearch_key = get_credential(args.meilisearch_key, "MEILISEARCH_KEY")
5758

59+
if not hf_ie_url:
60+
raise ValueError("HF_IE_URL is required. Set via --hf_ie_url or HF_IE_URL env var.")
5861
if not hf_ie_token:
5962
raise ValueError("HF_IE_TOKEN is required. Set via --hf_ie_token or HF_IE_TOKEN env var.")
6063
if not meilisearch_key:
@@ -76,8 +79,7 @@ def process_hf_docs_command(args):
7679

7780
embeddings = call_embedding_inference(
7881
all_chunks,
79-
args.hf_ie_name,
80-
args.hf_ie_namespace,
82+
hf_ie_url,
8183
hf_ie_token,
8284
is_python_module=False, # Pre-built docs are not Python modules
8385
)
@@ -111,14 +113,17 @@ def meilisearch_clean_command(args):
111113
def add_gradio_docs_command(args):
112114
"""Wrapper for add_gradio_docs that supports environment variables."""
113115
hf_ie_token = get_credential(args.hf_ie_token, "HF_IE_TOKEN")
116+
hf_ie_url = get_credential(args.hf_ie_url, "HF_IE_URL")
114117
meilisearch_key = get_credential(args.meilisearch_key, "MEILISEARCH_KEY")
115118

116119
if not hf_ie_token:
117120
raise ValueError("HF_IE_TOKEN is required. Set via --hf_ie_token or HF_IE_TOKEN env var.")
121+
if not hf_ie_url:
122+
raise ValueError("HF_IE_URL is required. Set via --hf_ie_url or HF_IE_URL env var.")
118123
if not meilisearch_key:
119124
raise ValueError("MEILISEARCH_KEY is required. Set via --meilisearch_key or MEILISEARCH_KEY env var.")
120125

121-
add_gradio_docs(args.hf_ie_name, args.hf_ie_namespace, hf_ie_token, meilisearch_key)
126+
add_gradio_docs(hf_ie_url, hf_ie_token, meilisearch_key)
122127

123128

124129
def embeddings_command_parser(subparsers=None):
@@ -146,9 +151,8 @@ def embeddings_command_parser(subparsers=None):
146151
"Doc Builder add-gradio-docs command. Add Gradio documentation to embeddings."
147152
)
148153

149-
parser_add_gradio_docs.add_argument("--hf_ie_name", type=str, help="Inference Endpoints name.", required=True)
150154
parser_add_gradio_docs.add_argument(
151-
"--hf_ie_namespace", type=str, help="Inference Endpoints namespace.", required=True
155+
"--hf_ie_url", type=str, help="Inference Endpoints URL (or set HF_IE_URL env var).", required=False
152156
)
153157
parser_add_gradio_docs.add_argument(
154158
"--hf_ie_token", type=str, help="Hugging Face token (or set HF_IE_TOKEN env var).", required=False
@@ -192,15 +196,9 @@ def embeddings_command_parser(subparsers=None):
192196
help="Skip embedding generation and meilisearch upload (useful for testing)",
193197
)
194198
parser_process_hf_docs.add_argument(
195-
"--hf_ie_name",
199+
"--hf_ie_url",
196200
type=str,
197-
help="Inference Endpoints name (required unless --skip-embeddings is set)",
198-
required=False,
199-
)
200-
parser_process_hf_docs.add_argument(
201-
"--hf_ie_namespace",
202-
type=str,
203-
help="Inference Endpoints namespace (required unless --skip-embeddings is set)",
201+
help="Inference Endpoints URL (or set HF_IE_URL env var, required unless --skip-embeddings is set)",
204202
required=False,
205203
)
206204
parser_process_hf_docs.add_argument(

0 commit comments

Comments
 (0)