Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions docs/docs/integrations/chat/huggingface.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
---
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add these to the new langchain docs?

https://docs.langchain.com/oss/python/contributing/integrations-langchain

title: Hugging Face (chat)
sidebar_label: Hugging Face
---

## Overview

This page shows how to use Hugging Face models as chat models in LangChain.

## Setup

Install the required packages:

```bash
pip install langchain-huggingface transformers
```

> For Hugging Face pipelines, prefer `max_new_tokens` (not `max_tokens`). The pipeline will use CPU/GPU automatically depending on availability.

## Instantiation

### Option 1 (works today): pipeline → wrap with `ChatHuggingFace`

```python
from transformers import pipeline
from langchain_huggingface import ChatHuggingFace

# Create a text-generation pipeline (CPU/GPU as available)
pipe = pipeline(
"text-generation",
model="microsoft/Phi-3-mini-4k-instruct",
do_sample=False, # deterministic
max_new_tokens=128, # HF uses max_new_tokens (not max_tokens)
)

# Wrap the pipeline as a LangChain chat model
llm = ChatHuggingFace(llm=pipe)
```

### Option 2 (coming after fix): `init_chat_model(..., model_provider="huggingface")`

Once available in your version, you can initialize via `init_chat_model`:

```python
from langchain.chat_models import init_chat_model

llm = init_chat_model(
model="microsoft/Phi-3-mini-4k-instruct",
model_provider="huggingface",
task="text-generation",
do_sample=False,
max_new_tokens=128,
)
```

> If your version doesn’t support this yet, use **Option 1** above.

## Invocation

```python
msg = llm.invoke("Say hi in one sentence.")
print(msg.content)
```

## Chaining

```python
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
("system", "You are helpful."),
("human", "{question}"),
])

chain = prompt | llm
result = chain.invoke({"question": "What is the capital of France?"})
print(result.content)
```

## API reference

- `langchain_huggingface.ChatHuggingFace`
- `transformers.pipeline` (Hugging Face)
- `langchain.chat_models.init_chat_model` (when available for Hugging Face)


138 changes: 138 additions & 0 deletions libs/langchain/tests/unit_tests/chat_models/test_init_chat_model_hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import sys
import types
from importlib import util as import_util
from types import SimpleNamespace
from typing import Any, Optional
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to handle this via integration tests rather than in chat models. Could you remove the unit test?


import pytest

from langchain.chat_models import init_chat_model


@pytest.fixture
def hf_fakes(monkeypatch: pytest.MonkeyPatch) -> SimpleNamespace:
"""Install fakes for Hugging Face and transformers.

Capture call arguments and simulate module presence to test initialization
behavior, including current failure modes.
"""
pipeline_calls: list[tuple[str, dict[str, Any]]] = []
init_calls: list[dict[str, Any]] = []

# Fake transformers.pipeline
def fake_pipeline(task: str, **kwargs: Any) -> SimpleNamespace:
pipeline_calls.append((task, dict(kwargs)))
return SimpleNamespace(_kind="dummy_hf_pipeline")

transformers_mod = types.ModuleType("transformers")
setattr(transformers_mod, "pipeline", fake_pipeline) # noqa: B010
monkeypatch.setitem(sys.modules, "transformers", transformers_mod)

# Fake langchain_huggingface.ChatHuggingFace that REQUIRES `llm`
class FakeChatHuggingFace:
def __init__(self, *, llm: object, **kwargs: Any) -> None:
init_calls.append({"llm": llm, "kwargs": dict(kwargs)})
self._llm = llm
self._kwargs = kwargs

# Build full package path:
# langchain_huggingface.chat_models.huggingface
hf_pkg = types.ModuleType("langchain_huggingface")
hf_pkg.__path__ = [] # mark as package

hf_chat_models_pkg = types.ModuleType("langchain_huggingface.chat_models")
hf_chat_models_pkg.__path__ = [] # mark as package

hf_chat_hf_mod = types.ModuleType(
"langchain_huggingface.chat_models.huggingface",
)
setattr(hf_chat_hf_mod, "ChatHuggingFace", FakeChatHuggingFace) # noqa: B010

# Also expose at package root for top-level imports
setattr(hf_pkg, "ChatHuggingFace", FakeChatHuggingFace) # noqa: B010

monkeypatch.setitem(sys.modules, "langchain_huggingface", hf_pkg)
monkeypatch.setitem(
sys.modules,
"langchain_huggingface.chat_models",
hf_chat_models_pkg,
)
monkeypatch.setitem(
sys.modules,
"langchain_huggingface.chat_models.huggingface",
hf_chat_hf_mod,
)

# Ensure _check_pkg sees both packages as installed
orig_find_spec = import_util.find_spec

def fake_find_spec(name: str) -> Optional[object]:
if name in {
"transformers",
"langchain_huggingface",
"langchain_huggingface.chat_models",
"langchain_huggingface.chat_models.huggingface",
}:
return object()
return orig_find_spec(name)

monkeypatch.setattr("importlib.util.find_spec", fake_find_spec)

return SimpleNamespace(
pipeline_calls=pipeline_calls,
init_calls=init_calls,
)


def test_hf_current_bug_basic_raises_typeerror(
hf_fakes: SimpleNamespace,
) -> None:
"""Current behavior raises TypeError when using Hugging Face provider.

init_chat_model constructs ChatHuggingFace without ``llm`` and never builds
a pipeline. Verify that explicitly.
"""
with pytest.raises(TypeError):
_ = init_chat_model(
"huggingface:microsoft/Phi-3-mini-4k-instruct",
task="text-generation",
temperature=0,
)
# Buggy path should not touch transformers.pipeline
assert not hf_fakes.pipeline_calls, "pipeline should NOT be called"


def test_hf_current_bug_max_tokens_case_raises_typeerror(
hf_fakes: SimpleNamespace,
) -> None:
"""Same failure when passing ``max_tokens``.

Should raise and avoid constructing a pipeline.
"""
with pytest.raises(TypeError):
_ = init_chat_model(
model="mistralai/Mistral-7B-Instruct-v0.2",
model_provider="huggingface",
task="text-generation",
max_tokens=42,
)
assert not hf_fakes.pipeline_calls, "pipeline should NOT be called"


def test_hf_current_bug_timeout_retries_case_raises_typeerror(
hf_fakes: SimpleNamespace,
) -> None:
"""Same failure when passing ``timeout``/``max_retries``.

Should raise and avoid constructing a pipeline.
"""
with pytest.raises(TypeError):
_ = init_chat_model(
model="microsoft/Phi-3-mini-4k-instruct",
model_provider="huggingface",
task="text-generation",
temperature=0.1,
timeout=7,
max_retries=3,
)
assert not hf_fakes.pipeline_calls, "pipeline should NOT be called"
Loading