From 9c212899c85d999ce54e157c332d9650d9b2e4a9 Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Thu, 10 Jul 2025 11:38:14 +0200
Subject: [PATCH 1/7] first draft of function calling tutorial

---
 .../guides/function-calling.md                | 505 ++++++++++++++++++
 1 file changed, 505 insertions(+)
 create mode 100644 docs/inference-providers/guides/function-calling.md
diff --git a/docs/inference-providers/guides/function-calling.md b/docs/inference-providers/guides/function-calling.md
new file mode 100644
index 000000000..d8a67cc8d
--- /dev/null
+++ b/docs/inference-providers/guides/function-calling.md
@@ -0,0 +1,505 @@
+# Function Calling with Inference Providers
+
+Function calling enables language models to interact with external tools and APIs by generating structured function calls based on user input. This capability allows you to build AI agents that can perform actions like retrieving real-time data, making calculations, or interacting with external services.
+
+When you provide a language model with function descriptions, it can decide when to call these functions based on user requests, execute them, and incorporate the results into natural language responses. For example, you can build an assistant that can fetch real-time weather data to provide responses.
+
+<Tip>
+
+This guide assumes you have a Hugging Face account and access token. If you don't have one, you can create a free account at [huggingface.co](https://huggingface.co) and get your token from your [settings page](https://huggingface.co/settings/tokens).
+
+</Tip>
+
+## Defining Functions
+
+The first step is implementing the functions you want the model to call and defining their schemas. We'll use a simple weather function example that returns the current weather for a given location.
+
+We can define the function as a Python function that performs a simple task. In this case, the function will return the current weather as a dictionary of location, temperature, and condition.
+
+<hfoptions id="define-functions">
+<hfoption id="openai">
+
+```python
+import json
+import os
+from openai import OpenAI
+
+# Initialize client
+client = OpenAI(
+    base_url="https://router.huggingface.co/nebius/v1",
+    api_key=os.environ["HF_TOKEN"],
+)
+
+# Define the function
+def get_current_weather(location: str) -> dict:
+    """Get weather information for a location."""
+    # In production, this would call a real weather API
+    weather_data = {
+        "San Francisco": {"temperature": "22°C", "condition": "Sunny"},
+        "New York": {"temperature": "18°C", "condition": "Cloudy"},
+        "London": {"temperature": "15°C", "condition": "Rainy"},
+    }
+    
+    return weather_data.get(location, {
+        "location": location,
+        "error": "Weather data not available"
+    })
+
+```
+
+</hfoption>
+<hfoption id="huggingface_hub">
+
+```python
+import json
+import os
+from huggingface_hub import InferenceClient
+
+# Initialize client
+client = InferenceClient(token=os.environ["HF_TOKEN"])
+
+# Define the function
+def get_current_weather(location: str) -> dict:
+    """Get weather information for a location."""
+    # In production, this would call a real weather API
+    weather_data = {
+        "San Francisco": {"temperature": "22°C", "condition": "Sunny"},
+        "New York": {"temperature": "18°C", "condition": "Cloudy"},
+        "London": {"temperature": "15°C", "condition": "Rainy"},
+    }
+    
+    return weather_data.get(location, {
+        "location": location,
+        "error": "Weather data not available"
+    })
+```
+
+</hfoption>
+</hfoptions>
+
+Now we need to define the function schema that describes our weather function to the language model. This schema tells the model what parameters the function expects and what it does:
+
+<hfoptions id="define-function-schema">
+<hfoption id="openai">
+
+```python
+
+# Define the function schema
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get current weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string", 
+                        "description": "City name"
+                    }
+                },
+                "required": ["location"],
+            },
+        },
+    }
+]
+```
+
+</hfoption>
+<hfoption id="huggingface_hub">
+
+```python
+
+# Define the function schema
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get current weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string", 
+                        "description": "City name"
+                    }
+                },
+                "required": ["location"],
+            },
+        },
+    }
+]
+```
+
+</hfoption>
+</hfoptions>
+
+The schema is a JSON Schema format that describes what the function does, its parameters, and which parameters are required. The description helps the model understand when to call the function and how to call it.
+
+## Handling Functions in Chats
+
+Once you've defined your functions, you can include them in chat completions. The model will decide when to call them based on user input:
+
+Functions work within typical chat completion conversations. The model will decide when to call them based on user input. 
+
+```python
+user_message = "What's the weather like in San Francisco?"
+
+messages = [
+    {
+        "role": "system",
+        "content": "You are a helpful assistant with access to weather data."
+    },
+    {"role": "user", "content": user_message}
+]
+
+# Initial API call with tools
+response = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1-0528",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto" # Let the model decide when to call functions
+)
+
+response_message = response.choices[0].message
+```
+
+<Tip>
+
+The `tool_choice` parameter is used to control when the model calls functions. In this case, we're using `auto`, which means the model will decide when to call functions (0 or more times). Below we'll expand on `tool_choice` and other parameters.
+
+</Tip>
+
+Next, we need to check if the model wants to call functions. If it does, we need to execute the function and add the result to the conversation.
+
+```python
+
+# Check if model wants to call functions
+if response_message.tool_calls:
+    # Add assistant's response to messages
+    messages.append(response_message)
+
+    # Process each tool call
+    for tool_call in response_message.tool_calls:
+        function_name = tool_call.function.name
+        function_args = json.loads(tool_call.function.arguments)
+
+        # Execute the function
+        if function_name == "get_current_weather":
+            result = get_current_weather(function_args["location"])
+            
+            # Add function result to messages
+            messages.append({
+                "tool_call_id": tool_call.id,
+                "role": "tool",
+                "name": function_name,
+                "content": json.dumps(result),
+            })
+
+    # Get final response with function results
+    final_response = client.chat.completions.create(
+        model="meta-llama/Llama-3.1-8B-Instruct",
+        messages=messages,
+    )
+
+    return final_response.choices[0].message.content
+else:
+    return response_message.content
+
+```
+
+The workflow is straightforward: make an initial API call with your tools, check if the model wants to call functions, execute them if needed, add the results to the conversation, and get the final response.
+
+<Tip warning={true}>
+
+We have handled that the model wants to call functions and that it is calling a function that exists. Models can call functions that don't exist, so we need to handle that case. We can also deal with this using `strict` mode, which we'll cover later.
+
+</Tip>
+
+## Multiple Functions
+
+You can define multiple functions for more complex assistants:
+
+```python
+# Define multiple functions
+def get_current_weather(location: str) -> dict:
+    """Get current weather for a location."""
+    return {"location": location, "temperature": "22°C", "condition": "Sunny"}
+
+def get_weather_forecast(location: str, date: str) -> dict:
+    """Get weather forecast for a location."""
+    return {
+        "location": location,
+        "date": date,
+        "forecast": "Sunny with chance of rain",
+        "temperature": "20°C"
+    }
+
+# Function registry
+AVAILABLE_FUNCTIONS = {
+    "get_current_weather": get_current_weather,
+    "get_weather_forecast": get_weather_forecast,
+}
+
+# Multiple tool schemas
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get current weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string", "description": "City name"}
+                },
+                "required": ["location"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather_forecast",
+            "description": "Get weather forecast for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string", "description": "City name"},
+                    "date": {"type": "string", "description": "Date in YYYY-MM-DD format"},
+                },
+                "required": ["location", "date"],
+            },
+        },
+    }
+]
+
+response = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1-0528",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto"
+)
+
+```
+
+We have defined multiple functions and added them to the tools list. The model will decide when to call them based on user input. We can also use the `tool_choice` parameter to force the model to call a specific function.
+
+We can handle the tool executions in a similar way to the single function example. This time using an `elif` statement to handle the different functions.
+
+```python
+# execute the response
+response_message = response.choices[0].message
+
+# check if the model wants to call functions
+if response_message.tool_calls:
+    # process the tool calls
+    for tool_call in response_message.tool_calls:
+        function_name = tool_call.function.name
+        function_args = json.loads(tool_call.function.arguments)
+
+        # execute the function
+        if function_name == "get_current_weather":
+            result = get_current_weather(function_args["location"])
+        elif function_name == "get_weather_forecast":
+            result = get_weather_forecast(function_args["location"], function_args["date"])
+
+        # add the result to the conversation
+        messages.append({
+            "tool_call_id": tool_call.id,
+            "role": "tool",
+            "name": function_name,
+            "content": json.dumps(result),
+        })
+
+    # get the final response with function results
+    final_response = client.chat.completions.create(
+        model="deepseek-ai/DeepSeek-R1-0528",
+        messages=messages,
+    )
+
+    return final_response.choices[0].message.content
+else:
+    return response_message.content
+
+```
+
+🎉 You've built a functional assistant that can call multiple functions to get weather data!
+
+## Additional Configuration
+
+Let's look at some additional configuration options for function calling with Inference Providers, to make the most of the capabilities.
+
+### Provider Selection
+
+You can specify which inference provider to use for more control over performance and cost. This will pay off with function calling by reducing variance in the model's response.
+
+<hfoptions id="provider-config">
+
+In the OpenAI client, you can specify the provider you want to use for the request by setting the `base_url` parameter.
+
+<hfoption id="openai">
+
+```python
+# The OpenAI client automatically routes through Inference Providers
+# You can specify provider preferences in your HF settings
+client = OpenAI(
+-    base_url="https://router.huggingface.co/together/v1",
++    base_url="https://router.huggingface.co/nebius/v1",
+    api_key=os.environ["HF_TOKEN"],
+)
+
+```
+
+</hfoption>
+
+<hfoption id="huggingface_hub">
+
+In the Hugging Face Hub client, you can specify the provider you want to use for the request by setting the `provider` parameter.
+
+```python
+# Specify a provider directly
+client = InferenceClient(
+    token=os.environ["HF_TOKEN"]
+-    provider="together"  # Override client provider
++    provider="nebius"  # Override client provider
+)
+
+```
+
+</hfoption>
+
+</hfoptions>
+
+By switching provider, you can see the model's response change because each provider uses a different configuration of the model.
+
+<Tip warning={true}>
+
+Each inference provider has different capabilities and performance characteristics. You can find more information about each provider in the [Inference Providers](/inference-providers/providers) section.
+
+</Tip>
+
+### Tool Choice Options
+
+You can control when and which functions are called using the `tool_choice` parameter in the OpenAI client.
+
+The `tool_choice` parameter is used to control when the model calls functions. In most cases, we're using `auto`, which means the model will decide when to call functions (0 or more times). 
+
+```python
+# Let the model decide (default)
+response = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1-0528",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto"  # Model decides when to call functions
+)
+```
+
+However, in some use cases, you may want to force the model to call a function so that it never replies based on its own knowledge, but only based on the function call results.
+
+```python
+# Force the model to call at least one function
+response = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1-0528",
+    messages=messages,
+    tools=tools,
+    tool_choice="required"  # Must call at least one function
+)
+```
+
+This works well if you have simple functions, but if you have more complex functions, you may want to use the `tool_choice` parameter to force the model to call a specific function at least once.
+
+For example, let's say you assistant's only job is to give the weather for a given location. You may want to force the model to call the `get_current_weather` function, and not call any other functions.
+
+```python
+# Force a specific function call
+response = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1-0528",
+    messages=messages,
+    tools=tools,
+    tool_choice={
+        "type": "function",
+        "function": {"name": "get_current_weather"}
+    }
+)
+```
+
+Here, we're forcing the model to call the `get_current_weather` function, and not call any other functions.
+
+### Strict Mode
+
+Use strict mode to ensure function calls follow your schema exactly. This is useful to prevent the model from calling functions with unexpected arguments, or calling functions that don't exist.
+
+```python
+# Define tools with strict mode
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get current weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {"type": "string", "description": "City name"},
+                },
+                "required": ["location"],
++                "additionalProperties": False,  # Strict mode requirement
+            },
++            "strict": True,  # Enable strict mode
+        },
+    }
+]
+```
+
+Strict mode ensures that function arguments match your schema exactly: no additional properties are allowed, all required parameters must be provided, and data types are strictly enforced.
+
+<Tip warning={true}>
+
+Strict mode is not supported by all providers. You can check the provider's documentation to see if it supports strict mode.
+
+</Tip>
+
+### Streaming Responses
+
+Enable streaming for real-time responses with function calls. This is useful to show the model's progress to the user, or to handle long-running function calls more efficiently. 
+
+```python
+# Enable streaming with function calls
+stream = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1-0528",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto",
+    stream=True  # Enable streaming
+)
+
+# Process the stream
+for chunk in stream:
+    if chunk.choices[0].delta.tool_calls:
+        # Handle tool call chunks
+        tool_calls = chunk.choices[0].delta.tool_calls
+    
+    if chunk.choices[0].delta.content:
+        # Handle content chunks
+        content = chunk.choices[0].delta.content
+```
+
+Streaming allows you to process responses as they arrive, show real-time progress to users, and handle long-running function calls more efficiently.
+
+<Tip warning={true}>
+
+Streaming is not supported by all providers. You can check the provider's documentation to see if it supports streaming.
+
+</Tip>
+
+# Next Steps
+
+Now that you've seen how to use function calling with Inference Providers, you can start building your own assistants! Why not try out some of these ideas:
+
+- Try smaller models for faster responses and lower costs
+- Build an agent that can fetch real-time data 
+- Use a reasoning model to build an agent that can reason with external tools
+

From 9936c32b7a73daefaff344dacadd693ec442107c Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Thu, 10 Jul 2025 11:42:41 +0200
Subject: [PATCH 2/7] split hf client and openai for complex tool choice mode

---
 .../guides/function-calling.md                | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/docs/inference-providers/guides/function-calling.md b/docs/inference-providers/guides/function-calling.md
index d8a67cc8d..8c43eb7ab 100644
--- a/docs/inference-providers/guides/function-calling.md
+++ b/docs/inference-providers/guides/function-calling.md
@@ -383,7 +383,7 @@ Each inference provider has different capabilities and performance characteristi
 
 ### Tool Choice Options
 
-You can control when and which functions are called using the `tool_choice` parameter in the OpenAI client.
+You can control when and which functions are called using the `tool_choice` parameter.
 
 The `tool_choice` parameter is used to control when the model calls functions. In most cases, we're using `auto`, which means the model will decide when to call functions (0 or more times). 
 
@@ -411,6 +411,10 @@ response = client.chat.completions.create(
 
 This works well if you have simple functions, but if you have more complex functions, you may want to use the `tool_choice` parameter to force the model to call a specific function at least once.
 
+<hfoptions id="tool-choice-options">
+
+<hfoption id="openai">
+
 For example, let's say you assistant's only job is to give the weather for a given location. You may want to force the model to call the `get_current_weather` function, and not call any other functions.
 
 ```python
@@ -428,6 +432,22 @@ response = client.chat.completions.create(
 
 Here, we're forcing the model to call the `get_current_weather` function, and not call any other functions.
 
+</hfoption>
+
+</hfoption>
+
+<hfoption id="huggingface_hub">
+
+<Tip warning={true}>
+
+Hugging Face Hub does not support the `tool_choice` parameters that specify which function to call.
+
+</Tip>
+
+</hfoption>
+
+</hfoptions>
+
 ### Strict Mode
 
 Use strict mode to ensure function calls follow your schema exactly. This is useful to prevent the model from calling functions with unexpected arguments, or calling functions that don't exist.

From ee02e7c69aff3ba407b20f0fa2091c4e4d758986 Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Thu, 10 Jul 2025 11:43:10 +0200
Subject: [PATCH 3/7] add guide to toc

---
 docs/inference-providers/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/inference-providers/_toctree.yml b/docs/inference-providers/_toctree.yml
index 7b9093114..724419aee 100644
--- a/docs/inference-providers/_toctree.yml
+++ b/docs/inference-providers/_toctree.yml
@@ -17,6 +17,8 @@
     title: Your First API Call
   - local: guides/building-first-app
     title: Building Your First AI App
+  - local: guides/function-calling
+    title: Function Calling
 
 - title: Providers
   sections:

From 49fa469d448b9b8a4438751b6cba18a1c03527dd Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Thu, 10 Jul 2025 12:02:16 +0200
Subject: [PATCH 4/7] drop pointless hfoption

---
 .../guides/function-calling.md                | 68 ++++---------------
 1 file changed, 15 insertions(+), 53 deletions(-)

diff --git a/docs/inference-providers/guides/function-calling.md b/docs/inference-providers/guides/function-calling.md
index 8c43eb7ab..8b69bf0d5 100644
--- a/docs/inference-providers/guides/function-calling.md
+++ b/docs/inference-providers/guides/function-calling.md
@@ -14,14 +14,17 @@ This guide assumes you have a Hugging Face account and access token. If you don'
 
 The first step is implementing the functions you want the model to call and defining their schemas. We'll use a simple weather function example that returns the current weather for a given location.
 
-We can define the function as a Python function that performs a simple task. In this case, the function will return the current weather as a dictionary of location, temperature, and condition.
+As always, we'll start by initializing the client for our inference client.
 
 <hfoptions id="define-functions">
 <hfoption id="openai">
 
+In the OpenAI client, we'll use the `base_url` parameter to specify the provider we want to use for the request.
+
 ```python
 import json
 import os
+
 from openai import OpenAI
 
 # Initialize client
@@ -29,35 +32,29 @@ client = OpenAI(
     base_url="https://router.huggingface.co/nebius/v1",
     api_key=os.environ["HF_TOKEN"],
 )
-
-# Define the function
-def get_current_weather(location: str) -> dict:
-    """Get weather information for a location."""
-    # In production, this would call a real weather API
-    weather_data = {
-        "San Francisco": {"temperature": "22°C", "condition": "Sunny"},
-        "New York": {"temperature": "18°C", "condition": "Cloudy"},
-        "London": {"temperature": "15°C", "condition": "Rainy"},
-    }
-    
-    return weather_data.get(location, {
-        "location": location,
-        "error": "Weather data not available"
-    })
-
 ```
 
 </hfoption>
 <hfoption id="huggingface_hub">
 
+In the Hugging Face Hub client, we'll use the `provider` parameter to specify the provider we want to use for the request.
+
 ```python
 import json
 import os
+
 from huggingface_hub import InferenceClient
 
 # Initialize client
-client = InferenceClient(token=os.environ["HF_TOKEN"])
+client = InferenceClient(token=os.environ["HF_TOKEN"], provider="nebius")
+```
 
+</hfoption>
+</hfoptions>
+
+We can define the function as a Python function that performs a simple task. In this case, the function will return the current weather as a dictionary of location, temperature, and condition.
+
+```python
 # Define the function
 def get_current_weather(location: str) -> dict:
     """Get weather information for a location."""
@@ -74,14 +71,9 @@ def get_current_weather(location: str) -> dict:
     })
 ```
 
-</hfoption>
-</hfoptions>
 
 Now we need to define the function schema that describes our weather function to the language model. This schema tells the model what parameters the function expects and what it does:
 
-<hfoptions id="define-function-schema">
-<hfoption id="openai">
-
 ```python
 
 # Define the function schema
@@ -106,36 +98,6 @@ tools = [
 ]
 ```
 
-</hfoption>
-<hfoption id="huggingface_hub">
-
-```python
-
-# Define the function schema
-tools = [
-    {
-        "type": "function",
-        "function": {
-            "name": "get_current_weather",
-            "description": "Get current weather for a location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string", 
-                        "description": "City name"
-                    }
-                },
-                "required": ["location"],
-            },
-        },
-    }
-]
-```
-
-</hfoption>
-</hfoptions>
-
 The schema is a JSON Schema format that describes what the function does, its parameters, and which parameters are required. The description helps the model understand when to call the function and how to call it.
 
 ## Handling Functions in Chats

From d71c5d11178fdc7d98ce51532559ecbb8a6bdc04 Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Thu, 10 Jul 2025 12:03:23 +0200
Subject: [PATCH 5/7] remove excess hfoption markup

---
 docs/inference-providers/guides/function-calling.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/inference-providers/guides/function-calling.md b/docs/inference-providers/guides/function-calling.md
index 8b69bf0d5..2bdb5bf37 100644
--- a/docs/inference-providers/guides/function-calling.md
+++ b/docs/inference-providers/guides/function-calling.md
@@ -396,8 +396,6 @@ Here, we're forcing the model to call the `get_current_weather` function, and no
 
 </hfoption>
 
-</hfoption>
-
 <hfoption id="huggingface_hub">
 
 <Tip warning={true}>

From b44f28d970d61663dfe898f5f626fbbf285e825e Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Mon, 14 Jul 2025 12:22:20 +0200
Subject: [PATCH 6/7] Apply suggestions from text review

Co-authored-by: Sergio Paniego Blanco <sergiopaniegoblanco@gmail.com>
Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
---
 .../guides/function-calling.md                | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/docs/inference-providers/guides/function-calling.md b/docs/inference-providers/guides/function-calling.md
index 2bdb5bf37..71320c518 100644
--- a/docs/inference-providers/guides/function-calling.md
+++ b/docs/inference-providers/guides/function-calling.md
@@ -2,17 +2,17 @@
 
 Function calling enables language models to interact with external tools and APIs by generating structured function calls based on user input. This capability allows you to build AI agents that can perform actions like retrieving real-time data, making calculations, or interacting with external services.
 
-When you provide a language model with function descriptions, it can decide when to call these functions based on user requests, execute them, and incorporate the results into natural language responses. For example, you can build an assistant that can fetch real-time weather data to provide responses.
+When you provide a language model that has been fine-tuned to use tools with function descriptions, it can decide when to call these functions based on user requests, execute them, and incorporate the results into natural language responses. For example, you can build an assistant that fetches real-time weather data to provide accurate responses.
 
 <Tip>
 
-This guide assumes you have a Hugging Face account and access token. If you don't have one, you can create a free account at [huggingface.co](https://huggingface.co) and get your token from your [settings page](https://huggingface.co/settings/tokens).
+This guide assumes you have a Hugging Face account and access token. You can create a free account at [huggingface.co](https://huggingface.co) and get your token from your [settings page](https://huggingface.co/settings/tokens).
 
 </Tip>
 
 ## Defining Functions
 
-The first step is implementing the functions you want the model to call and defining their schemas. We'll use a simple weather function example that returns the current weather for a given location.
+The first step is implementing the functions you want the model to call. We'll use a simple weather function example that returns the current weather for a given location.
 
 As always, we'll start by initializing the client for our inference client.
 
@@ -89,12 +89,12 @@ tools = [
                     "location": {
                         "type": "string", 
                         "description": "City name"
-                    }
+                    },
                 },
                 "required": ["location"],
             },
         },
-    }
+    },
 ]
 ```
 
@@ -102,8 +102,6 @@ The schema is a JSON Schema format that describes what the function does, its pa
 
 ## Handling Functions in Chats
 
-Once you've defined your functions, you can include them in chat completions. The model will decide when to call them based on user input:
-
 Functions work within typical chat completion conversations. The model will decide when to call them based on user input. 
 
 ```python
@@ -134,7 +132,7 @@ The `tool_choice` parameter is used to control when the model calls functions. I
 
 </Tip>
 
-Next, we need to check if the model wants to call functions. If it does, we need to execute the function and add the result to the conversation.
+Next, we need to check in the model response where the model decided to call any functions. If it did, we need to execute the function and add the result to the conversation, before we send the final response to the user.
 
 ```python
 
@@ -172,11 +170,11 @@ else:
 
 ```
 
-The workflow is straightforward: make an initial API call with your tools, check if the model wants to call functions, execute them if needed, add the results to the conversation, and get the final response.
+The workflow is straightforward: make an initial API call with your tools, check if the model wants to call functions, execute them if needed, add the results to the conversation, and get the final response for the user.
 
 <Tip warning={true}>
 
-We have handled that the model wants to call functions and that it is calling a function that exists. Models can call functions that don't exist, so we need to handle that case. We can also deal with this using `strict` mode, which we'll cover later.
+We have handled the case where the model wants to call a function and that the function actually exists. However, models might try to call functions that don’t exist, so we need to account for that as well. We can also deal with this using `strict` mode, which we'll cover later.
 
 </Tip>
 
@@ -235,7 +233,7 @@ tools = [
                 "required": ["location", "date"],
             },
         },
-    }
+    },
 ]
 
 response = client.chat.completions.create(
@@ -321,7 +319,7 @@ client = OpenAI(
 
 In the Hugging Face Hub client, you can specify the provider you want to use for the request by setting the `provider` parameter.
 
-```python
+```diff
 # Specify a provider directly
 client = InferenceClient(
     token=os.environ["HF_TOKEN"]
@@ -339,7 +337,7 @@ By switching provider, you can see the model's response change because each prov
 
 <Tip warning={true}>
 
-Each inference provider has different capabilities and performance characteristics. You can find more information about each provider in the [Inference Providers](/inference-providers/providers) section.
+Each inference provider has different capabilities and performance characteristics. You can find more information about each provider in the [Inference Providers](/inference-providers/index#partners) section.
 
 </Tip>
 
@@ -377,7 +375,7 @@ This works well if you have simple functions, but if you have more complex funct
 
 <hfoption id="openai">
 
-For example, let's say you assistant's only job is to give the weather for a given location. You may want to force the model to call the `get_current_weather` function, and not call any other functions.
+For example, let's say your assistant's only job is to give the weather for a given location. You may want to force the model to call the `get_current_weather` function, and not call any other functions.
 
 ```python
 # Force a specific function call
@@ -400,7 +398,7 @@ Here, we're forcing the model to call the `get_current_weather` function, and no
 
 <Tip warning={true}>
 
-Hugging Face Hub does not support the `tool_choice` parameters that specify which function to call.
+Currently, Hugging Face Hub does not support the `tool_choice` parameters that specify which function to call.
 
 </Tip>
 
@@ -430,7 +428,7 @@ tools = [
             },
 +            "strict": True,  # Enable strict mode
         },
-    }
+    },
 ]
 ```
 
@@ -475,7 +473,7 @@ Streaming is not supported by all providers. You can check the provider's docume
 
 </Tip>
 
-# Next Steps
+## Next Steps
 
 Now that you've seen how to use function calling with Inference Providers, you can start building your own assistants! Why not try out some of these ideas:
 

From a3b59ebe6af5e107b97d4e22109ff41757305971 Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Mon, 14 Jul 2025 12:33:29 +0200
Subject: [PATCH 7/7] clarify provider selection based on feedback

---
 .../inference-providers/guides/function-calling.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/inference-providers/guides/function-calling.md b/docs/inference-providers/guides/function-calling.md
index 2bdb5bf37..eeb6e434a 100644
--- a/docs/inference-providers/guides/function-calling.md
+++ b/docs/inference-providers/guides/function-calling.md
@@ -29,7 +29,7 @@ from openai import OpenAI
 
 # Initialize client
 client = OpenAI(
-    base_url="https://router.huggingface.co/nebius/v1",
+    base_url="https://router.huggingface.co/v1",
     api_key=os.environ["HF_TOKEN"],
 )
 ```
@@ -162,7 +162,7 @@ if response_message.tool_calls:
 
     # Get final response with function results
     final_response = client.chat.completions.create(
-        model="meta-llama/Llama-3.1-8B-Instruct",
+        model="deepseek-ai/DeepSeek-R1-0528",
         messages=messages,
     )
 
@@ -308,8 +308,9 @@ In the OpenAI client, you can specify the provider you want to use for the reque
 # The OpenAI client automatically routes through Inference Providers
 # You can specify provider preferences in your HF settings
 client = OpenAI(
--    base_url="https://router.huggingface.co/together/v1",
-+    base_url="https://router.huggingface.co/nebius/v1",
++    base_url="https://router.huggingface.co/v1", # automatically select provider based on hf.co/settings/inference-providers
+-    base_url="https://router.huggingface.co/together/v1", # manually select Together AI
+-    base_url="https://router.huggingface.co/nebius/v1", # manually select Nebius
     api_key=os.environ["HF_TOKEN"],
 )
 
@@ -325,8 +326,9 @@ In the Hugging Face Hub client, you can specify the provider you want to use for
 # Specify a provider directly
 client = InferenceClient(
     token=os.environ["HF_TOKEN"]
--    provider="together"  # Override client provider
-+    provider="nebius"  # Override client provider
++    provider="auto"  # automatically select provider based on hf.co/settings/inference-providers
+-    provider="together"  # manually select Together AI
+-    provider="nebius"  # manually select Nebius
 )
 
 ```