Merge pull request #8910 from burtenshaw/update-hf-inference

RomneyDa · web-flow · commit 7927491e7789 · 2025-12-04T12:11:59.000-08:00
docs: update hugging face inference
diff --git a/docs/customize/model-providers/more/huggingfaceinferenceapi.mdx b/docs/customize/model-providers/more/huggingfaceinferenceapi.mdx
diff --git a/docs/customize/model-providers/overview.mdx b/docs/customize/model-providers/overview.mdx
@@ -20,6 +20,7 @@ These are the most commonly used model providers that offer a wide range of capa
 | [xAI](/customize/model-providers/more/xAI)                     | Grok models from xAI                                                            | Chat, Edit, Apply                           |
 | [Vertex AI](/customize/model-providers/top-level/vertexai)     | Google Cloud's machine learning platform                                        | Chat, Edit, Apply, Embeddings               |
 | [Inception](/customize/model-providers/top-level/inception)    | On-premises open-source model runners                                           | Chat, Edit, Apply                           |
+| [HuggingFace](/customize/model-providers/top-level/huggingfaceinference) | Platform for open source models with inference providers and endpoints           | Chat, Edit, Apply, Embeddings               |
 
 ## Additional Model Providers
 
@@ -37,7 +38,6 @@ Beyond the top-level providers, Continue supports many other options:
 | [Cohere](/customize/model-providers/more/cohere)                       | Models specialized for semantic search and text generation |
 | [NVIDIA](/customize/model-providers/more/nvidia)                       | GPU-accelerated model hosting                              |
 | [Cloudflare](/customize/model-providers/more/cloudflare)               | Edge-based AI inference services                           |
-| [HuggingFace](/customize/model-providers/more/huggingfaceinferenceapi) | Platform for open source models                            |
 
 ### Local Model Options
 
diff --git a/docs/customize/model-providers/top-level/huggingfaceinference.mdx b/docs/customize/model-providers/top-level/huggingfaceinference.mdx
@@ -0,0 +1,89 @@
+---
+title: "Hugging Face"
+---
+
+Hugging Face is the main platform for sharing open AI models. It provides inference in two ways. [Inference Providers](https://huggingface.co/docs/inference-providers/index) and [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index).
+
+## Inference Providers
+
+Inference Providers is a serverless service powered by external inference providers and routed through Hugging Face and paid per token.
+
+<Info>
+
+You can access your access token from [Hugging Face](https://huggingface.co/settings/tokens) and prioritize your [providers in settings](https://huggingface.co/settings/inference-providers/overview).
+
+</Info>
+
+<Tabs>
+  <Tab title="YAML">
+  ```yaml title="config.yaml"  
+  name: My Config
+  version: 0.0.1
+  schema: v1
+
+  models:
+    - name: deepseek
+      provider: huggingface-inference-providers
+      model: deepseek-ai/DeepSeek-V3.2-Exp
+      apiKey: <YOUR_HF_TOKEN>
+      apiBase: https://router.huggingface.co/v1
+  ```
+  </Tab>
+  <Tab title="JSON (Deprecated)">
+  ```json title="config.json"
+  {
+    "models": [
+      {
+        "title": "deepseek",
+        "provider": "huggingface-inference-providers",
+        "model": "deepseek-ai/DeepSeek-V3.2-Exp",
+        "apiKey": "<YOUR_HF_TOKEN>",
+        "apiBase": "https://router.huggingface.co/v1"
+      }
+    ]
+  }
+  ```
+  </Tab>
+</Tabs>
+
+## Inference Endpoints
+
+Inference Endpoints is a dedicated service that allows you to run your open models dedicated hardware. It is a more advanced way to get inference from Hugging Face models where you have more control over the whole process.
+
+<Info>
+
+Before you can use Inference Endpoints, you need to create an endpoint. You can do this by going to [Inference Endpoints](https://endpoints.huggingface.co/burtenshaw/endpoints/dedicated) and clicking on "Create Endpoint".
+
+</Info>
+
+<Tabs>
+  <Tab title="YAML">
+  ```yaml title="config.yaml"
+  name: My Config
+  version: 0.0.1
+  schema: v1
+
+  models:
+    - name: deepseek
+      provider: huggingface-inference-endpoints
+      model: <ENDPOINT_ID>
+      apiKey: <YOUR_HF_TOKEN>
+      apiBase: https://<YOUR_ENDPOINT_ID>.aws.endpoints.huggingface.cloud
+  ```
+  </Tab>
+  <Tab title="JSON (Deprecated)">
+  ```json title="config.json"
+  {
+    "models": [
+      {
+        "title": "deepseek",
+        "provider": "huggingface-inference-endpoints",
+        "model": "<ENDPOINT_ID>",
+        "apiKey": "<YOUR_HF_TOKEN>",
+        "apiBase": "https://<YOUR_ENDPOINT_ID>.aws.endpoints.huggingface.cloud"
+      }
+    ]
+  }
+  ```
+  </Tab>
+</Tabs>
diff --git a/docs/docs.json b/docs/docs.json
@@ -184,6 +184,7 @@
                       "customize/model-providers/top-level/azure",
                       "customize/model-providers/top-level/bedrock",
                       "customize/model-providers/top-level/gemini",
+                      "customize/model-providers/top-level/huggingfaceinference",
                       "customize/model-providers/top-level/inception",
                       "customize/model-providers/top-level/lmstudio",
                       "customize/model-providers/top-level/ollama",
@@ -200,7 +201,6 @@
                       "customize/model-providers/more/deepseek",
                       "customize/model-providers/more/deepinfra",
                       "customize/model-providers/more/groq",
-                      "customize/model-providers/more/huggingfaceinferenceapi",
                       "customize/model-providers/more/llamacpp",
                       "customize/model-providers/more/llamastack",
                       "customize/model-providers/more/mistral",