Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/examples/intrinsics/answer_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from mellea.stdlib.intrinsics import rag


backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = ChatContext().add(Message("user", "Who attended the meeting?"))
documents = [
Document("Meeting attendees: Alice, Bob, Carol."),
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/intrinsics/answerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from mellea.stdlib.intrinsics import rag


backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = ChatContext().add(Message("assistant", "Hello there, how can I help you?"))
next_user_turn = "What is the square root of 4?"
documents_answerable = [Document("The square root of 4 is 2.")]
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/intrinsics/citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import json


backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = ChatContext().add(
Message(
"user",
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/intrinsics/context_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from mellea.stdlib.intrinsics import rag


backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = ChatContext()
question = "Who is the CEO of Microsoft?"
document = Document(
Expand All @@ -28,4 +28,4 @@
)

result = rag.check_context_relevance(question, document, context, backend)
print(f"Result of context relevance check: {result}")
print(f"Result of context relevance check with irrelevant document: {result}")
2 changes: 1 addition & 1 deletion docs/examples/intrinsics/hallucination_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import json


backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = (
ChatContext()
.add(Message("assistant", "Hello there, how can I help you?"))
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/intrinsics/query_rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from mellea.stdlib.intrinsics import rag


backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = (
ChatContext()
.add(Message("assistant", "Welcome to pet questions!"))
Expand Down
7 changes: 4 additions & 3 deletions mellea/backends/adapters/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@ class IntriniscsCatalogEntry(pydantic.BaseModel):
)


_RAG_REPO = "ibm-granite/rag-intrinsics-lib"
_RAG_REPO = "ibm-granite/granite-lib-rag-r1.0"
_CORE_REPO = "ibm-granite/rag-intrinsics-lib"


_INTRINSICS_CATALOG_ENTRIES = [
############################################
# Core Intrinsics
############################################
IntriniscsCatalogEntry(name="requirement_check", repo_id=_RAG_REPO),
IntriniscsCatalogEntry(name="uncertainty", repo_id=_RAG_REPO),
IntriniscsCatalogEntry(name="requirement_check", repo_id=_CORE_REPO),
IntriniscsCatalogEntry(name="uncertainty", repo_id=_CORE_REPO),
############################################
# RAG Intrinsics
############################################
Expand Down
17 changes: 7 additions & 10 deletions test/stdlib_intrinsics/test_rag/test_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"""Location of data files for the tests in this file."""


BASE_MODEL = "ibm-granite/granite-3.3-2b-instruct"
BASE_MODEL = "ibm-granite/granite-4.0-micro"


@pytest.fixture(name="backend")
Expand Down Expand Up @@ -82,20 +82,19 @@ def test_answerability(backend):

# First call triggers adapter loading
result = rag.check_answerability(next_user_turn, documents, context, backend)
assert pytest.approx(result) == 1.0
assert pytest.approx(result, rel=0.01) == 1.0

# Second call hits a different code path from the first one
result = rag.check_answerability(next_user_turn, documents, context, backend)
assert pytest.approx(result) == 1.0
assert pytest.approx(result, rel=0.01) == 1.0


@pytest.mark.qualitative
def test_query_rewrite(backend):
"""Verify that the answerability intrinsic functions properly."""
context, next_user_turn, _ = _read_input_json("query_rewrite.json")
expected = (
"Is Rex, the dog, more likely to get fleas because he spends a lot of "
"time outdoors?"
"Is Rex more likely to get fleas because he spends a lot of time outdoors?"
)

# First call triggers adapter loading
Expand Down Expand Up @@ -132,11 +131,11 @@ def test_context_relevance(backend):

# First call triggers adapter loading
result = rag.check_context_relevance(question, document, context, backend)
assert pytest.approx(result, abs=2e-2) == 0.45
assert pytest.approx(result, abs=1e-2) == 0.0

# Second call hits a different code path from the first one
result = rag.check_context_relevance(question, document, context, backend)
assert pytest.approx(result, abs=2e-2) == 0.45
assert pytest.approx(result, abs=1e-2) == 0.0


@pytest.mark.qualitative
Expand Down Expand Up @@ -165,9 +164,7 @@ def test_answer_relevance(backend):
# Note that this is not the optimal answer. This test is currently using an
# outdated LoRA adapter. Releases of new adapters will come after the Mellea
# integration has stabilized.
expected_rewrite = (
"The documents do not provide information about the attendees of the meeting."
)
expected_rewrite = "The meeting attendees were Alice, Bob, and Carol."

# First call triggers adapter loading
result = rag.rewrite_answer_for_relevance(answer, docs, context, backend)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"index": 0,
"message": {
"content": "[{\"response_begin\": 0, \"response_end\": 36, \"response_text\": \"Purple bumble fish are yellow. \", \"faithfulness_likelihood\": 0.2062460112028628, \"explanation\": \"This sentence makes a factual claim about the color of fish. However, the provided document only mentions one type of fish that is yellow, which is the purple bumble fish. There is no information about green bumble fish in the document, so the claim about green bumble fish being yellow cannot be verified.\"}, {\"response_begin\": 36, \"response_end\": 70, \"response_text\": \"Green bumble fish are also yellow.\", \"faithfulness_likelihood\": 0.006380047389365753, \"explanation\": \"This sentence makes a factual claim about the color of fish. However, the provided document only mentions one type of fish that is yellow, which is the purple bumble fish. There is no information about green bumble fish in the document, so the claim about green bumble fish being yellow cannot be verified.\"}]",
"content": "[{\"response_begin\": 0, \"response_end\": 36, \"response_text\": \"Purple bumble fish are yellow. \", \"faithfulness_likelihood\": 0.7280598165124975, \"explanation\": \"This sentence makes a factual claim about the color of purple bumble fish. The document states 'The only type of fish that is yellow is the purple bumble fish.' This directly supports the claim in the sentence.\"}, {\"response_begin\": 36, \"response_end\": 70, \"response_text\": \"Green bumble fish are also yellow.\", \"faithfulness_likelihood\": 0.08656033472953338, \"explanation\": \"This sentence makes a factual claim about the color of green bumble fish. However, the document does not mention green bumble fish at all. Therefore, this claim cannot be verified from the provided context.\"}]",
"role": "assistant"
}
}
Expand Down