huggingface · Narsil · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025
diff --git a/README.md b/README.md
@@ -239,10 +239,10 @@ Options:
 
           [env: DEFAULT_PROMPT=]
 
-      --hf-api-token <HF_API_TOKEN>
-          Your HuggingFace hub token
+      --hf-token <HF_TOKEN>
+          Your Hugging Face Hub token
 
-          [env: HF_API_TOKEN=]
+          [env: HF_TOKEN=]
 
       --hostname <HOSTNAME>
           The IP address to listen on
@@ -332,14 +332,14 @@ at: [https://huggingface.github.io/text-embeddings-inference](https://huggingfac
 
 ### Using a private or gated model
 
-You have the option to utilize the `HF_API_TOKEN` environment variable for configuring the token employed by
+You have the option to utilize the `HF_TOKEN` environment variable for configuring the token employed by
 `text-embeddings-inference`. This allows you to gain access to protected resources.
 
 For example:
 
 1. Go to https://huggingface.co/settings/tokens
 2. Copy your cli READ token
-3. Export `HF_API_TOKEN=<your cli READ token>`
+3. Export `HF_TOKEN=<your cli READ token>`
 
 or with Docker:
 
@@ -348,7 +348,7 @@ model=<your private model>
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 token=<your cli READ token>
 
-docker run --gpus all -e HF_API_TOKEN=$token -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.6 --model-id $model
+docker run --gpus all -e HF_TOKEN=$token -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.6 --model-id $model
 ```
 
 ### Air gapped deployment

diff --git a/backends/candle/tests/snapshots/test_gte__snowflake_gte_batch.snap b/backends/candle/tests/snapshots/test_gte__snowflake_gte_batch.snap
@@ -2307,4 +2307,3 @@ expression: embeddings_batch
   - -0.16524515
   - -0.100704014
   - 0.3677737
-
diff --git a/backends/candle/tests/snapshots/test_gte__snowflake_gte_single.snap b/backends/candle/tests/snapshots/test_gte__snowflake_gte_single.snap
@@ -771,4 +771,3 @@ expression: embeddings_single
   - -0.16524515
   - -0.100704014
   - 0.3677737
-
diff --git a/docs/source/en/cli_arguments.md b/docs/source/en/cli_arguments.md
@@ -131,10 +131,10 @@ Options:
 
           [env: DEFAULT_PROMPT=]
 
-      --hf-api-token <HF_API_TOKEN>
+      --hf-api-token <HF_TOKEN>
           Your HuggingFace hub token
 
-          [env: HF_API_TOKEN=]
+          [env: HF_TOKEN=]
 
       --hostname <HOSTNAME>
           The IP address to listen on

diff --git a/docs/source/en/private_models.md b/docs/source/en/private_models.md
@@ -24,10 +24,10 @@ Once you have confirmed that you have access to the model:
 - Navigate to your account's [Profile | Settings | Access Tokens page](https://huggingface.co/settings/tokens).
 - Generate and copy a read token.
 
-If you're the CLI, set the `HF_API_TOKEN` environment variable. For example:
+If you're the CLI, set the `HF_TOKEN` environment variable. For example:
 
 ```shell
-export HF_API_TOKEN=<YOUR READ TOKEN>
+export HF_TOKEN=<YOUR READ TOKEN>
 ```
 
 Alternatively, you can provide the token when deploying the model with Docker:
@@ -37,5 +37,5 @@ model=<your private model>
 volume=$PWD/data
 token=<your cli Hugging Face Hub token>
 
-docker run --gpus all -e HF_API_TOKEN=$token -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.6 --model-id $model
+docker run --gpus all -e HF_TOKEN=$token -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.6 --model-id $model
 ```
diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -54,7 +54,7 @@ pub async fn run(
     auto_truncate: bool,
     default_prompt: Option<String>,
     default_prompt_name: Option<String>,
-    hf_api_token: Option<String>,
+    hf_token: Option<String>,
     hostname: Option<String>,
     port: u16,
     uds_path: Option<String>,
@@ -70,9 +70,7 @@ pub async fn run(
         // Using a local model
         (model_id_path.to_path_buf(), None)
     } else {
-        let mut builder = ApiBuilder::new()
-            .with_progress(false)
-            .with_token(hf_api_token);
+        let mut builder = ApiBuilder::new().with_progress(false).with_token(hf_token);
 
         if let Some(cache_dir) = huggingface_hub_cache {
             builder = builder.with_cache_dir(cache_dir.into());

diff --git a/router/src/main.rs b/router/src/main.rs
@@ -106,11 +106,16 @@ struct Args {
     #[clap(long, env, conflicts_with = "default_prompt_name")]
     default_prompt: Option<String>,
 
-    /// Your HuggingFace hub token
-    #[clap(long, env)]
+    /// [DEPRECATED IN FAVOR OF `--hf-token`] Your Hugging Face Hub token
+    #[clap(long, env, hide = true)]
     #[redact(partial)]
     hf_api_token: Option<String>,
 
+    /// Your Hugging Face Hub token
+    #[clap(long, env, conflicts_with = "hf_api_token")]
+    #[redact(partial)]
+    hf_token: Option<String>,
+
     /// The IP address to listen on
     #[clap(default_value = "0.0.0.0", long, env)]
     hostname: String,
@@ -188,6 +193,13 @@ async fn main() -> Result<()> {
         }
     });
 
+    // Since `--hf-api-token` is deprecated in favor of `--hf-token`, we need to still make sure
+    // that if the user provides the token with `--hf-api-token` the token is still parsed properly
+    if args.hf_api_token.is_some() {
+        tracing::warn!("The `--hf-api-token` argument (and the `HF_API_TOKEN` env var) is deprecated and will be removed in a future version. Please use `--hf-token` (or the `HF_TOKEN` env var) instead.");
+    }
+    let token = args.hf_token.or(args.hf_api_token);
+
     text_embeddings_router::run(
         args.model_id,
         args.revision,
@@ -201,7 +213,7 @@ async fn main() -> Result<()> {
         args.auto_truncate,
         args.default_prompt,
         args.default_prompt_name,
-        args.hf_api_token,
+        token,
         Some(args.hostname),
         args.port,
         Some(args.uds_path),
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2307,4 +2307,3 @@ expression: embeddings_batch
		- -0.16524515
		- -0.100704014
		- 0.3677737
Original file line number	Diff line number	Diff line change
Expand Up		@@ -771,4 +771,3 @@ expression: embeddings_single
		- -0.16524515
		- -0.100704014
		- 0.3677737