feat: making Granite 4 the default model (#178)

HendrikStrobelt · web-flow · commit 545c1b3790fa · 2025-10-03T10:27:12.000-04:00
* Granite 4 is now default

* Granite 4 is now default

* Granite 4 notebbok updates
diff --git a/docs/examples/best_of_n/prm.py b/docs/examples/best_of_n/prm.py
@@ -2,6 +2,7 @@
 
 from docs.examples.helper import w
 from mellea import start_session
+from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
 from mellea.backends.process_reward_models.huggingface.prms import (
     HFGenerativePRM,
     HFRegressionPRM,
@@ -11,7 +12,11 @@
 from mellea.stdlib.sampling.best_of_n import BestofNSamplingStrategy
 
 # create a session for the generator using Granite 3.3 8B on Huggingface and a simple context [see below]
-m = start_session(backend_name="hf", model_options={ModelOption.MAX_NEW_TOKENS: 512})
+m = start_session(
+    backend_name="hf",
+    model_id=IBM_GRANITE_3_3_8B,
+    model_options={ModelOption.MAX_NEW_TOKENS: 512},
+)
 
 # initialize the PRM model
 prm_model = HFGenerativePRM(
diff --git a/docs/examples/generative_slots/generative_gsm8k.py b/docs/examples/generative_slots/generative_gsm8k.py
@@ -130,11 +130,11 @@ def extract_final_short_answer(
 
 if __name__ == "__main__":
     scores = []
+    m = start_session()
+
     for question, target in (
         x.values() for x in load_dataset("gsm8k", "main", split="train[:100]")
     ):
-        m = start_session()
-
         target = int(target.split("####")[-1])
         response = compute_chain_of_thought_and_final_answer(m, question=question)
         for step in response.step_by_step_solution:
diff --git a/docs/examples/generative_slots/generative_slots.py b/docs/examples/generative_slots/generative_slots.py
@@ -15,8 +15,8 @@ def generate_summary(text: str) -> str:
 
 
 if __name__ == "__main__":
-    with start_session():
-        sentiment_component = classify_sentiment(text="I love this!")
+    with start_session() as m:
+        sentiment_component = classify_sentiment(m, text="I love this!")
         print("Output sentiment is : ", sentiment_component)
 
         summary = generate_summary(
diff --git a/docs/examples/information_extraction/101_with_gen_slots.py b/docs/examples/information_extraction/101_with_gen_slots.py
@@ -3,12 +3,14 @@
 from mellea import generative, start_session
 from mellea.backends import model_ids
 
-m = start_session(model_id=model_ids.MISTRALAI_MISTRAL_0_3_7B)
+m = start_session()
 
 
 @generative
 def extract_all_person_names(doc: str) -> list[str]:
-    """Given a document, extract all person names. Return these names as list of strings."""
+    """
+    Given a document, extract names of ALL mentioned persons. Return these names as list of strings.
+    """
 
 
 # ref: https://www.nytimes.com/2012/05/20/world/world-leaders-at-us-meeting-urge-growth-not-austerity.html
diff --git a/docs/examples/information_extraction/advanced_with_m_instruct.py b/docs/examples/information_extraction/advanced_with_m_instruct.py
@@ -35,11 +35,11 @@ def _at_least_(t: str) -> bool:
 
 
 # start session
-m = start_session(model_id=model_ids.MISTRALAI_MISTRAL_0_3_7B)
+m = start_session()
 
 # run extraction using grounding context and sampling strategy
 sampled_p_names = m.instruct(
-    "Extract the person names from the document (doc1).",
+    "Extract ALL person names from the document (doc1).",
     grounding_context={"doc1": NYTimes_text},
     requirements=[check(None, validation_fn=simple_validate(at_least_n(2)))],
     strategy=RejectionSamplingStrategy(loop_budget=5),
diff --git a/docs/examples/instruct_validate_repair/101_email.py b/docs/examples/instruct_validate_repair/101_email.py
@@ -17,10 +17,11 @@
 # # start_session() is equivalent to:
 # from mellea.backends import model_ids
 # from mellea.backends.ollama import OllamaModelBackend
-# from mellea import MelleaSession, SimpleContext
+# from mellea import MelleaSession
+# from mellea.stdlib.base import SimpleContext
 # m = MelleaSession(
 #     backend=OllamaModelBackend(
-#         model_id=model_ids.IBM_GRANITE_3_3_8B,
+#         model_id=model_ids.IBM_GRANITE_4_MICRO_3B,
 #         model_options={ModelOption.MAX_NEW_TOKENS: 200},
 #     ),
 #     ctx=SimpleContext()
diff --git a/docs/examples/instruct_validate_repair/101_email_comparison.py b/docs/examples/instruct_validate_repair/101_email_comparison.py
@@ -11,7 +11,7 @@
     "Write a very funny email to invite all interns to the office party."
 )
 print(
-    f"***** email 1 ****\n{w(email_v1)}\n*******email 2 ******\n{w(email_v2)}\n*******"
+    f"***** email 1 ****\n{w(email_v1)}\n*******email 2 ******\n{w(email_v2)}\n\n*******"
 )
 
 # Use the emails as grounding context to evaluate which one is quirkier
diff --git a/docs/examples/instruct_validate_repair/101_email_with_requirements.py b/docs/examples/instruct_validate_repair/101_email_with_requirements.py
@@ -8,7 +8,7 @@
 # write an email with automatic requirement checking.
 email_v1 = m.instruct(
     "Write an email to invite all interns to the office party.",
-    requirements=["be formal", "Use 'Dear interns' as greeting."],
+    requirements=["be formal", "Use 'Dear Interns' as greeting."],
 )
 
 # print result
diff --git a/docs/examples/instruct_validate_repair/101_email_with_validate.py b/docs/examples/instruct_validate_repair/101_email_with_validate.py
@@ -1,14 +1,15 @@
 from docs.examples.helper import req_print, w
 from mellea import start_session
+from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
 from mellea.backends.types import ModelOption
 from mellea.stdlib.sampling import RejectionSamplingStrategy
 
-# create a session using Granite 3.3 8B on Ollama and a simple context [see below]
+# create a session using Granite 4 Micro (3B) on Ollama and a simple context [see below]
 m = start_session(model_options={ModelOption.MAX_NEW_TOKENS: 200})
 
 email_v2_samples = m.instruct(
-    "Write an email to invite all interns to the office party.",
-    requirements=["be formal", "Use 'Dear interns' as greeting."],
+    "Write a very short email to invite all interns to the office party.",
+    requirements=["Use formal language.", "Use 'Dear Interns' as greeting."],
     strategy=RejectionSamplingStrategy(loop_budget=3),
     return_sampling_results=True,
 )
diff --git a/docs/examples/mini_researcher/researcher.py b/docs/examples/mini_researcher/researcher.py
@@ -20,7 +20,7 @@
 @cache
 def get_session():
     """Get M session (change model here)."""
-    return MelleaSession(backend=OllamaModelBackend(model_ids.IBM_GRANITE_3_3_8B))
+    return MelleaSession(backend=OllamaModelBackend(model_ids.IBM_GRANITE_4_MICRO_3B))
 
 
 @cache
diff --git a/docs/examples/mobject/table.py b/docs/examples/mobject/table.py
@@ -3,6 +3,7 @@
 import pandas
 
 import mellea
+from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
 from mellea.stdlib.mify import mify
 
 
@@ -51,9 +52,10 @@ def transpose(self):
 if __name__ == "__main__":
     m = mellea.start_session()
     db = MyCompanyDatabase()
-    print(m.query(db, "What were sales for the Northeast branch this month?"))
+    print(m.query(db, "What were sales for the Northeast branch this month?").value)
     result = m.transform(db, "Update the northeast sales to 1250.")
     print(type(result))
     print(db.table)
     print(m.query(db, "What were sales for the Northeast branch this month?"))
     result = m.transform(db, "Transpose the table.")
+    print(result)
diff --git a/docs/examples/notebooks/compositionality_with_generative_slots.ipynb b/docs/examples/notebooks/compositionality_with_generative_slots.ipynb
@@ -7,7 +7,7 @@
    },
    "source": [
     "# Compositionality with Generative Slots\n",
-    "This Jupyter notebook runs on Colab demonstrates GEnerative Slots, a function whose implementation is provided by an LLM. "
+    "This Jupyter notebook runs on Colab demonstrates GEnerative Slots, a function whose implementation is provided by an LLM."
    ]
   },
   {
@@ -168,7 +168,7 @@
    "metadata": {},
    "source": [
     "## Start a Mellea Session\n",
-    "We initialize a backend running Ollama using the granite3.3-chat model."
+    "We initialize a backend running Ollama using the granite 4 micro  model."
    ]
   },
   {
diff --git a/docs/examples/notebooks/context_example.ipynb b/docs/examples/notebooks/context_example.ipynb
@@ -73,7 +73,7 @@
     "\n",
     "Up to this point we have used SimpleContext, a context manager that resets the chat message history on each model call. That is, the model's context is entirely determined by the current Component.\n",
     "\n",
-    "Mellea also provides a LinearContext, which behaves like a chat history. We will use the LinearContext to interact with cat hmodels:"
+    "Mellea also provides a LinearContext, which behaves like a chat history. We will use the ChatContext to interact with chat models:"
    ]
   },
   {
diff --git a/docs/examples/notebooks/example.ipynb b/docs/examples/notebooks/example.ipynb
@@ -70,7 +70,7 @@
    },
    "source": [
     "## Import Mellea and Start a Session\n",
-    "We initialize a backend running Ollama using the granite3.3-chat model."
+    "We initialize a backend running Ollama using the granite 4 micro model."
    ]
   },
   {
diff --git a/docs/examples/notebooks/georgia_tech.ipynb b/docs/examples/notebooks/georgia_tech.ipynb
@@ -21,7 +21,7 @@
     "\n",
     "Run the first cell during our introduction. The first cell will:\n",
     " * download an install ollama on your Colab instance\n",
-    " * download the `granite3.3:8b` model weights\n"
+    " * download the `ibm/granite4:micro` model weights\n"
    ]
   },
   {
@@ -37,7 +37,7 @@
     "!nohup ollama serve >/dev/null 2>&1 &\n",
     "\n",
     "# Download the granite:3.3:8b weights.\n",
-    "!ollama pull granite3.3:8b\n",
+    "!ollama pull ibm/granite4:micro\n",
     "!ollama pull llama3.2:3b\n",
     "\n",
     "# install Mellea.\n",
@@ -68,7 +68,7 @@
     "# LAB 1: Hello, Mellea!\n",
     "\n",
     "Running `mellea.start_session()` initialize a new `MelleaSession`. The session holds three things:\n",
-    "1. The model to use for this session. In this tutorial we will use granite3.3:8b.\n",
+    "1. The model to use for this session. In this tutorial we will use granite 4 micro (3B).\n",
     "2. An inference engine; i.e., the code that actually calls our model. We will be using ollama, but you can also use Huggingface or any OpenAI-compatible endpoint.\n",
     "3. A `Context`, which tells Mellea how to remember context between requests. This is sometimes called the \"Message History\" in other frameworks. Throughout this tutorial, we will be using a `SimpleContext`. In `SimpleContext`s, **every request starts with a fresh context**. There is no preserved chat history between requests. Mellea provides other types of context, but today we will not be using those features. See the Tutorial for further details."
    ]
diff --git a/docs/examples/rag/simple_rag_with_filter.py b/docs/examples/rag/simple_rag_with_filter.py
@@ -68,7 +68,7 @@ def is_answer_relevant_to_question(answer: str, question: str) -> bool:
     print(f"results:\n {results_str}\n ====")
     del embedding_model  # help GC
 
-    # Create Mellea session
+    # Create Mellea session with Mistral. Also work with other models.
     m = start_session(model_id=model_ids.MISTRALAI_MISTRAL_0_3_7B)
 
     # Check for each document from retrieval if it is actually relevant
diff --git a/docs/examples/tutorial/document_mobject.py b/docs/examples/tutorial/document_mobject.py
@@ -1,3 +1,5 @@
+from mellea.backends import model_ids
+from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
 from mellea.stdlib.docs.richdocument import RichDocument
 
 rd = RichDocument.from_document_file("https://arxiv.org/pdf/1906.04043")
@@ -10,11 +12,11 @@
 from mellea import start_session  # noqa: E402
 from mellea.backends.types import ModelOption  # noqa: E402
 
-m = start_session()
+m = start_session(model_id=model_ids.META_LLAMA_3_2_3B)
 for seed in [x * 12 for x in range(5)]:
     table2 = m.transform(
         table1,
-        "Add a column 'Model' that extracts which model was used or 'None' if none.",
+        "Add a column 'Model' that extracts which model was used in Feature description or 'None' if none.",
         model_options={ModelOption.SEED: seed},
     )
     if isinstance(table2, Table):
diff --git a/docs/examples/tutorial/model_options_example.py b/docs/examples/tutorial/model_options_example.py
@@ -4,13 +4,12 @@
 from mellea.backends.types import ModelOption
 
 m = mellea.MelleaSession(
-    backend=OllamaModelBackend(
-        model_id=model_ids.IBM_GRANITE_3_2_8B, model_options={ModelOption.SEED: 42}
-    )
+    backend=OllamaModelBackend(model_options={ModelOption.SEED: 42})
 )
 
 answer = m.instruct(
-    "What is 2x2?", model_options={"temperature": 0.5, "num_predict": 5}
+    "What is 2x2?",
+    model_options={ModelOption.TEMPERATURE: 0.5, ModelOption.MAX_NEW_TOKENS: 15},
 )
 
 print(str(answer))
diff --git a/docs/examples/tutorial/simple_email.py b/docs/examples/tutorial/simple_email.py
@@ -15,7 +15,7 @@ def write_email(m: mellea.MelleaSession, name: str, notes: str) -> str:
         "Write an email to {{name}} using the notes following: {{notes}}.",
         user_variables={"name": name, "notes": notes},
     )
-    return email.value  # str(email) also works.
+    return str(email.value)  # str(email) also works.
 
 
 print(
@@ -70,7 +70,7 @@ def write_email_with_strategy(m: mellea.MelleaSession, name: str, notes: str) ->
         return str(email_candidate.result)
     else:
         print("Expect sub-par result.")
-        return email_candidate.sample_generations[0].value
+        return str(email_candidate.sample_generations[0].value)
 
 
 print(
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
@@ -46,7 +46,7 @@ class LiteLLMBackend(FormatterBackend):
 
     def __init__(
         self,
-        model_id: str = "ollama/" + str(model_ids.IBM_GRANITE_3_3_8B.ollama_name),
+        model_id: str = "ollama/" + str(model_ids.IBM_GRANITE_4_MICRO_3B.ollama_name),
         formatter: Formatter | None = None,
         base_url: str | None = "http://localhost:11434",
         model_options: dict | None = None,
diff --git a/mellea/backends/model_ids.py b/mellea/backends/model_ids.py
@@ -25,6 +25,14 @@ class ModelIdentifier:
 #### IBM models ####
 ####################
 
+IBM_GRANITE_4_MICRO_3B = ModelIdentifier(
+    hf_model_name="ibm-granite/granite-4.0-micro",
+    ollama_name="ibm/granite4:micro",
+    watsonx_name="ibm/granite-4-h-small",
+)
+# todo: watsonx model is different from ollama model - should be same.
+
+
 IBM_GRANITE_3_2_8B = ModelIdentifier(
     hf_model_name="ibm-granite/granite-3.2-8b-instruct",
     ollama_name="granite3.2:8b",
diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
@@ -38,7 +38,7 @@ class OllamaModelBackend(FormatterBackend):
 
     def __init__(
         self,
-        model_id: str | ModelIdentifier = model_ids.IBM_GRANITE_3_3_8B,
+        model_id: str | ModelIdentifier = model_ids.IBM_GRANITE_4_MICRO_3B,
         formatter: Formatter | None = None,
         base_url: str | None = None,
         model_options: dict | None = None,
diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
@@ -75,7 +75,7 @@ class OpenAIBackend(FormatterBackend, AloraBackendMixin):
 
     def __init__(
         self,
-        model_id: str | ModelIdentifier = model_ids.IBM_GRANITE_3_3_8B,
+        model_id: str | ModelIdentifier = model_ids.IBM_GRANITE_4_MICRO_3B,
         formatter: Formatter | None = None,
         base_url: str | None = None,
         model_options: dict | None = None,
diff --git a/mellea/stdlib/session.py b/mellea/stdlib/session.py
@@ -9,7 +9,11 @@
 
 import mellea.stdlib.funcs as mfuncs
 from mellea.backends import Backend, BaseModelSubclass
-from mellea.backends.model_ids import IBM_GRANITE_3_3_8B, ModelIdentifier
+from mellea.backends.model_ids import (
+    IBM_GRANITE_3_3_8B,
+    IBM_GRANITE_4_MICRO_3B,
+    ModelIdentifier,
+)
 from mellea.backends.ollama import OllamaModelBackend
 from mellea.backends.openai import OpenAIBackend
 from mellea.helpers.fancy_logger import FancyLogger
@@ -70,7 +74,7 @@ def backend_name_to_class(name: str) -> Any:
 
 def start_session(
     backend_name: Literal["ollama", "hf", "openai", "watsonx", "litellm"] = "ollama",
-    model_id: str | ModelIdentifier = IBM_GRANITE_3_3_8B,
+    model_id: str | ModelIdentifier = IBM_GRANITE_4_MICRO_3B,
     ctx: Context | None = None,
     *,
     model_options: dict | None = None,

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`"Write a very funny email to invite all interns to the office party."`
`12`	`12`	`)`
`13`	`13`	`print(`
`14`		`- f"*** email 1 \n{w(email_v1)}\n***email 2 **\n{w(email_v2)}\n*****"`
	`14`	`+ f"*** email 1 \n{w(email_v1)}\n***email 2 **\n{w(email_v2)}\n\n*****"`
`15`	`15`	`)`
`16`	`16`
`17`	`17`	`# Use the emails as grounding context to evaluate which one is quirkier`
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`# write an email with automatic requirement checking.`
`9`	`9`	`email_v1 = m.instruct(`
`10`	`10`	`"Write an email to invite all interns to the office party.",`
`11`		`- requirements=["be formal", "Use 'Dear interns' as greeting."],`
	`11`	`+ requirements=["be formal", "Use 'Dear Interns' as greeting."],`
`12`	`12`	`)`
`13`	`13`
`14`	`14`	`# print result`