Skip to content
8 changes: 7 additions & 1 deletion docs/source/en/package_reference/inference_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,18 @@ This part of the lib is still under development and will be improved in future r

[[autodoc]] huggingface_hub.ChatCompletionInputFunctionName

[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType
[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema

[[autodoc]] huggingface_hub.ChatCompletionInputMessage

[[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk

[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject

[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema

[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText

[[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions

[[autodoc]] huggingface_hub.ChatCompletionInputTool
Expand Down
8 changes: 7 additions & 1 deletion docs/source/ko/package_reference/inference_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,18 @@ rendered properly in your Markdown viewer.

[[autodoc]] huggingface_hub.ChatCompletionInputFunctionName

[[autodoc]] huggingface_hub.ChatCompletionInputGrammarType
[[autodoc]] huggingface_hub.ChatCompletionInputJSONSchema

[[autodoc]] huggingface_hub.ChatCompletionInputMessage

[[autodoc]] huggingface_hub.ChatCompletionInputMessageChunk

[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONObject

[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatJSONSchema

[[autodoc]] huggingface_hub.ChatCompletionInputResponseFormatText

[[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions

[[autodoc]] huggingface_hub.ChatCompletionInputTool
Expand Down
15 changes: 12 additions & 3 deletions src/huggingface_hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,13 @@
"ChatCompletionInputFunctionDefinition",
"ChatCompletionInputFunctionName",
"ChatCompletionInputGrammarType",
"ChatCompletionInputGrammarTypeType",
"ChatCompletionInputJSONSchema",
"ChatCompletionInputMessage",
"ChatCompletionInputMessageChunk",
"ChatCompletionInputMessageChunkType",
"ChatCompletionInputResponseFormatJSONObject",
"ChatCompletionInputResponseFormatJSONSchema",
"ChatCompletionInputResponseFormatText",
"ChatCompletionInputStreamOptions",
"ChatCompletionInputTool",
"ChatCompletionInputToolCall",
Expand Down Expand Up @@ -545,10 +548,13 @@
"ChatCompletionInputFunctionDefinition",
"ChatCompletionInputFunctionName",
"ChatCompletionInputGrammarType",
"ChatCompletionInputGrammarTypeType",
"ChatCompletionInputJSONSchema",
"ChatCompletionInputMessage",
"ChatCompletionInputMessageChunk",
"ChatCompletionInputMessageChunkType",
"ChatCompletionInputResponseFormatJSONObject",
"ChatCompletionInputResponseFormatJSONSchema",
"ChatCompletionInputResponseFormatText",
"ChatCompletionInputStreamOptions",
"ChatCompletionInputTool",
"ChatCompletionInputToolCall",
Expand Down Expand Up @@ -1267,10 +1273,13 @@ def __dir__():
ChatCompletionInputFunctionDefinition, # noqa: F401
ChatCompletionInputFunctionName, # noqa: F401
ChatCompletionInputGrammarType, # noqa: F401
ChatCompletionInputGrammarTypeType, # noqa: F401
ChatCompletionInputJSONSchema, # noqa: F401
ChatCompletionInputMessage, # noqa: F401
ChatCompletionInputMessageChunk, # noqa: F401
ChatCompletionInputMessageChunkType, # noqa: F401
ChatCompletionInputResponseFormatJSONObject, # noqa: F401
ChatCompletionInputResponseFormatJSONSchema, # noqa: F401
ChatCompletionInputResponseFormatText, # noqa: F401
ChatCompletionInputStreamOptions, # noqa: F401
ChatCompletionInputTool, # noqa: F401
ChatCompletionInputToolCall, # noqa: F401
Expand Down
5 changes: 4 additions & 1 deletion src/huggingface_hub/inference/_generated/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@
ChatCompletionInputFunctionDefinition,
ChatCompletionInputFunctionName,
ChatCompletionInputGrammarType,
ChatCompletionInputGrammarTypeType,
ChatCompletionInputJSONSchema,
ChatCompletionInputMessage,
ChatCompletionInputMessageChunk,
ChatCompletionInputMessageChunkType,
ChatCompletionInputResponseFormatJSONObject,
ChatCompletionInputResponseFormatJSONSchema,
ChatCompletionInputResponseFormatText,
ChatCompletionInputStreamOptions,
ChatCompletionInputTool,
ChatCompletionInputToolCall,
Expand Down
52 changes: 43 additions & 9 deletions src/huggingface_hub/inference/_generated/types/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# See:
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
from typing import Any, List, Literal, Optional, Union
from typing import Any, Dict, List, Literal, Optional, Union

from .base import BaseInferenceType, dataclass_with_extra

Expand Down Expand Up @@ -45,17 +45,51 @@ class ChatCompletionInputMessage(BaseInferenceType):
tool_calls: Optional[List[ChatCompletionInputToolCall]] = None


ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"]
@dataclass_with_extra
class ChatCompletionInputJSONSchema(BaseInferenceType):
name: str
"""
The name of the response format.
"""
description: Optional[str] = None
"""
A description of what the response format is for, used by the model to determine
how to respond in the format.
"""
schema: Optional[Dict[str, object]] = None
"""
The schema for the response format, described as a JSON Schema object. Learn how
to build JSON schemas [here](https://json-schema.org/).
"""
strict: Optional[bool] = None
"""
Whether to enable strict schema adherence when generating the output. If set to
true, the model will always follow the exact schema defined in the `schema`
field.
"""


@dataclass_with_extra
class ChatCompletionInputGrammarType(BaseInferenceType):
type: "ChatCompletionInputGrammarTypeType"
value: Any
"""A string that represents a [JSON Schema](https://json-schema.org/).
JSON Schema is a declarative language that allows to annotate JSON documents
with types and descriptions.
"""
class ChatCompletionInputResponseFormatText(BaseInferenceType):
type: Literal["text"]


@dataclass_with_extra
class ChatCompletionInputResponseFormatJSONSchema(BaseInferenceType):
type: Literal["json_schema"]
json_schema: ChatCompletionInputJSONSchema


@dataclass_with_extra
class ChatCompletionInputResponseFormatJSONObject(BaseInferenceType):
type: Literal["json_object"]


ChatCompletionInputGrammarType = Union[
ChatCompletionInputResponseFormatText,
ChatCompletionInputResponseFormatJSONSchema,
ChatCompletionInputResponseFormatJSONObject,
]


@dataclass_with_extra
Expand Down
2 changes: 1 addition & 1 deletion src/huggingface_hub/inference/_providers/cerebras.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from huggingface_hub.inference._providers._common import BaseConversationalTask
from ._common import BaseConversationalTask


class CerebrasConversationalTask(BaseConversationalTask):
Expand Down
24 changes: 21 additions & 3 deletions src/huggingface_hub/inference/_providers/cohere.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from huggingface_hub.inference._providers._common import (
BaseConversationalTask,
)
from typing import Any, Dict, Optional

from huggingface_hub.hf_api import InferenceProviderMapping

from ._common import BaseConversationalTask


_PROVIDER = "cohere"
Expand All @@ -13,3 +15,19 @@ def __init__(self):

def _prepare_route(self, mapped_model: str, api_key: str) -> str:
return "/compatibility/v1/chat/completions"

def _prepare_payload_as_dict(
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
) -> Optional[Dict]:
payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
response_format = parameters.get("response_format")
if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
json_schema_details = response_format.get("json_schema")
if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
payload["response_format"] = { # type: ignore [index]
"type": "json_object",
"schema": json_schema_details["schema"],
}
# Only remove response_format from parameters if we've handled it
parameters.pop("response_format", None)
return payload
19 changes: 19 additions & 0 deletions src/huggingface_hub/inference/_providers/fireworks_ai.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from typing import Any, Dict, Optional

from huggingface_hub.hf_api import InferenceProviderMapping

from ._common import BaseConversationalTask


Expand All @@ -7,3 +11,18 @@ def __init__(self):

def _prepare_route(self, mapped_model: str, api_key: str) -> str:
return "/inference/v1/chat/completions"

def _prepare_payload_as_dict(
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
) -> Optional[Dict]:
payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
response_format = parameters.get("response_format")
if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
json_schema_details = response_format.get("json_schema")
if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
payload["response_format"] = { # type: ignore [index]
"type": "json_object",
"schema": json_schema_details["schema"],
}
parameters.pop("response_format", None)
return payload
10 changes: 9 additions & 1 deletion src/huggingface_hub/inference/_providers/hf_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,21 @@ def __init__(self):
def _prepare_payload_as_dict(
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
) -> Optional[Dict]:
payload = filter_none(parameters)
mapped_model = provider_mapping_info.provider_id
payload_model = parameters.get("model") or mapped_model

if payload_model is None or payload_model.startswith(("http://", "https://")):
payload_model = "dummy"

return {**filter_none(parameters), "model": payload_model, "messages": inputs}
response_format = parameters.get("response_format")
if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
payload["response_format"] = {
"type": "json_object",
"value": response_format["json_schema"]["schema"],
}
parameters.pop("response_format", None)
return {**payload, "model": payload_model, "messages": inputs}

def _prepare_url(self, api_key: str, mapped_model: str) -> str:
base_url = (
Expand Down
13 changes: 13 additions & 0 deletions src/huggingface_hub/inference/_providers/nebius.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ class NebiusConversationalTask(BaseConversationalTask):
def __init__(self):
super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")

def _prepare_payload_as_dict(
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
) -> Optional[Dict]:
payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
response_format = parameters.get("response_format")
if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
json_schema_details = response_format.get("json_schema")
if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
payload["guided_json"] = json_schema_details["schema"] # type: ignore [index]
# Only remove response_format from parameters if we've handled it
parameters.pop("response_format", None)
return payload


class NebiusTextToImageTask(TaskProviderHelper):
def __init__(self):
Expand Down
14 changes: 14 additions & 0 deletions src/huggingface_hub/inference/_providers/sambanova.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@ class SambanovaConversationalTask(BaseConversationalTask):
def __init__(self):
super().__init__(provider="sambanova", base_url="https://api.sambanova.ai")

def _prepare_payload_as_dict(
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
) -> Optional[Dict]:
response_format_config = parameters.get("response_format")
if isinstance(response_format_config, dict):
if response_format_config.get("type") == "json_schema":
json_schema_config = response_format_config.get("json_schema", {})
strict = json_schema_config.get("strict")
if isinstance(json_schema_config, dict) and (strict is True or strict is None):
json_schema_config["strict"] = False

payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
return payload


class SambanovaFeatureExtractionTask(TaskProviderHelper):
def __init__(self):
Expand Down
16 changes: 16 additions & 0 deletions src/huggingface_hub/inference/_providers/together.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,22 @@ class TogetherConversationalTask(BaseConversationalTask):
def __init__(self):
super().__init__(provider=_PROVIDER, base_url=_BASE_URL)

def _prepare_payload_as_dict(
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
) -> Optional[Dict]:
payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
response_format = parameters.get("response_format")
if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
json_schema_details = response_format.get("json_schema")
if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
payload["response_format"] = { # type: ignore [index]
"type": "json_object",
"schema": json_schema_details["schema"],
}
# Only remove response_format from parameters if we've handled it
parameters.pop("response_format", None)
return payload


class TogetherTextToImageTask(TogetherTask):
def __init__(self):
Expand Down