diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
index 0a71dbcace..6425d4c0f0 100644
--- a/keras_hub/api/models/__init__.py
+++ b/keras_hub/api/models/__init__.py
@@ -421,6 +421,16 @@
 from keras_hub.src.models.phi3.phi3_tokenizer import (
     Phi3Tokenizer as Phi3Tokenizer,
 )
+from keras_hub.src.models.phi4.phi4_backbone import Phi4Backbone as Phi4Backbone
+from keras_hub.src.models.phi4.phi4_causal_lm import (
+    Phi4CausalLM as Phi4CausalLM,
+)
+from keras_hub.src.models.phi4.phi4_causal_lm_preprocessor import (
+    Phi4CausalLMPreprocessor as Phi4CausalLMPreprocessor,
+)
+from keras_hub.src.models.phi4.phi4_tokenizer import (
+    Phi4Tokenizer as Phi4Tokenizer,
+)
 from keras_hub.src.models.preprocessor import Preprocessor as Preprocessor
 from keras_hub.src.models.qwen.qwen_backbone import (
     QwenBackbone as Qwen2Backbone,
diff --git a/keras_hub/api/tokenizers/__init__.py b/keras_hub/api/tokenizers/__init__.py
index 082078184f..822840ee61 100644
--- a/keras_hub/api/tokenizers/__init__.py
+++ b/keras_hub/api/tokenizers/__init__.py
@@ -68,6 +68,9 @@
 from keras_hub.src.models.phi3.phi3_tokenizer import (
     Phi3Tokenizer as Phi3Tokenizer,
 )
+from keras_hub.src.models.phi4.phi4_tokenizer import (
+    Phi4Tokenizer as Phi4Tokenizer,
+)
 from keras_hub.src.models.qwen.qwen_tokenizer import (
     QwenTokenizer as Qwen2Tokenizer,
 )
diff --git a/keras_hub/src/models/phi4/__init__.py b/keras_hub/src/models/phi4/__init__.py
new file mode 100644
index 0000000000..c03faf3c17
--- /dev/null
+++ b/keras_hub/src/models/phi4/__init__.py
@@ -0,0 +1 @@
+# TODO: Add a register_presets call once phi4_presets.py is implemented.
diff --git a/keras_hub/src/models/phi4/phi4_backbone.py b/keras_hub/src/models/phi4/phi4_backbone.py
new file mode 100644
index 0000000000..23328c3f78
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_backbone.py
@@ -0,0 +1,61 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.phi3.phi3_backbone import Phi3Backbone
+
+
+@keras_hub_export("keras_hub.models.Phi4Backbone")
+class Phi4Backbone(Phi3Backbone):
+    """Phi-4 core network with hyperparameters.
+
+    This network implements a Transformer-based decoder network,
+    Phi-4, as described in ["Phi-4 Technical Report"](https://arxiv.org/pdf/2412.08905).
+    It includes the embedding lookups and transformer layers.
+
+    The default constructor gives a fully customizable, randomly initialized
+    phi-4 model with any number of layers, heads, and embedding
+    dimensions. To load preset architectures and weights, use the `from_preset`
+    constructor.
+
+    Note that the defaults here are the Phi-3 defaults, because the Phi-4 model
+    follows the Phi-3-medium architecture but with different hyper-parameters.
+    Use `keras_hub.models.Backbone.from_preset` to get the Phi-4 defaults.
+
+    Args:
+        vocabulary_size: int. The size of the token vocabulary.
+        num_layers: int. The number of transformer layers.
+        hidden_dim: int. The size of the embeddings and the hidden states of
+            the transformer layers.
+        intermediate_dim: int. The output dimension of the first Dense layer in
+            a three-layer feedforward network for each transformer.
+        num_query_heads: int. The number of query attention heads for each
+            transformer layer.
+        num_key_value_heads: int. The number of key and value attention heads
+            for each transformer layer.
+        layer_norm_epsilon: float, optional. Epsilon for the RMS layernorm
+            layers in the transformer decoder. Defaults to `1e-6`.
+        dropout:: float, optional. Dropout probability for the Transformer
+            decoder.
+        max_sequence_length: int, optional. The maximum sequence length
+            that this model might ever be used with. Defaults to `4096`.
+        pretraining_sequence_length: int, optional. The maximum sequence length
+            that the model was pretrained with. Defaults to `4096`.
+        rope_max_wavelength: int, optional. The maximum angular wavelength of
+            the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
+        rope_scaling_type: str, optional. The type of the rope scaling. Can be
+            either `None` or `"su"`. `None` is for no rope scaling, `"su"` is
+            for SuScaled rope, `"su"` is used when `max_sequence_length` is
+            larger than `original_max_sequence_length`. Defaults to `None`.
+        rope_scaling_short_factor: list[float]. List of factors used to adjust
+            rope frequencies when the `rope_scaling_type` is `"su"`. List must
+            be of length `hidden_dim//num_query_heads//2`. It is used when
+            `sequence_length` is smaller than `pretraining_sequence_length`.
+            Defaults to `None`.
+        rope_scaling_long_factor: list[float]. List of factors used to adjust
+            rope frequencies when the `rope_scaling_type` is `"su"`. List must
+            be of length `hidden_dim//num_query_heads//2`. It is used when
+            `sequence_length` is larger than `pretraining_sequence_length`.
+            Defaults to `None`.
+        dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
+            for model computations and weights. Note that some computations,
+            such as softmax and layer normalization, will always be done at
+            float32 precision regardless of dtype.
+    """
diff --git a/keras_hub/src/models/phi4/phi4_backbone_test.py b/keras_hub/src/models/phi4/phi4_backbone_test.py
new file mode 100644
index 0000000000..adac55054e
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_backbone_test.py
@@ -0,0 +1,92 @@
+import pytest
+from keras import ops
+
+from keras_hub.src.models.phi4.phi4_backbone import Phi4Backbone
+from keras_hub.src.tests.test_case import TestCase
+
+
+class Phi4Test(TestCase):
+    def setUp(self):
+        self.init_kwargs = {
+            "vocabulary_size": 10,
+            "num_layers": 2,
+            "num_query_heads": 4,
+            "num_key_value_heads": 2,
+            "hidden_dim": 8,
+            "intermediate_dim": 8,
+        }
+        self.su_rotary_init_kwargs = {
+            "vocabulary_size": 10,
+            "num_layers": 2,
+            "num_query_heads": 2,
+            "num_key_value_heads": 1,
+            "hidden_dim": 8,
+            "intermediate_dim": 12,
+            "max_sequence_length": 10,
+            "pretraining_sequence_length": 5,
+            "rope_scaling_type": "su",
+            "rope_scaling_short_factor": [1.2, 1.4],
+            "rope_scaling_long_factor": [0.8, 0.6],
+        }
+        self.input_data = {
+            "token_ids": ops.ones((2, 5), dtype="int32"),
+            "padding_mask": ops.ones((2, 5), dtype="int32"),
+        }
+
+    def test_backbone_basics(self):
+        self.run_backbone_test(
+            cls=Phi4Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=(2, 5, 8),
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=Phi4Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
+    def test_backbone_basics_with_su_rotary(self):
+        self.run_backbone_test(
+            cls=Phi4Backbone,
+            init_kwargs=self.su_rotary_init_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=(2, 5, 8),
+        )
+
+    @pytest.mark.large
+    def test_saved_model_with_su_rotary(self):
+        self.run_model_saving_test(
+            cls=Phi4Backbone,
+            init_kwargs=self.su_rotary_init_kwargs,
+            input_data=self.input_data,
+        )
+
+    @pytest.mark.extra_large
+    def test_smallest_preset(self):
+        self.run_preset_test(
+            cls=Phi4Backbone,
+            preset="phi4_mini_4k_instruct_en",
+            input_data={
+                "token_ids": ops.array([[1, 450, 4996, 1701, 29916, 29889]]),
+                "padding_mask": ops.ones((1, 6), dtype="int32"),
+            },
+            expected_output_shape=(1, 6, 3072),
+            # The forward pass from a preset should be stable!
+            # Reference values computed using PyTorch HF model.
+            expected_partial_output=ops.array(
+                [-0.21222, 0.04004, -0.02759, 0.02200]
+            ),
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in Phi4Backbone.presets:
+            self.run_preset_test(
+                cls=Phi4Backbone,
+                preset=preset,
+                input_data=self.input_data,
+            )
diff --git a/keras_hub/src/models/phi4/phi4_causal_lm.py b/keras_hub/src/models/phi4/phi4_causal_lm.py
new file mode 100644
index 0000000000..8f014d81e2
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_causal_lm.py
@@ -0,0 +1,33 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.phi3.phi3_causal_lm import Phi3CausalLM
+from keras_hub.src.models.phi4.phi4_backbone import Phi4Backbone
+from keras_hub.src.models.phi4.phi4_causal_lm_preprocessor import (
+    Phi4CausalLMPreprocessor,
+)
+
+
+@keras_hub_export("keras_hub.models.Phi4CausalLM")
+class Phi4CausalLM(Phi3CausalLM):
+    """An end-to-end Phi4 model for causal language modeling.
+
+    A causal language model (LM) predicts the next token based on previous
+    tokens. This task setup can be used to train the model unsupervised on
+    plain text input, or to autoregressively generate plain text similar to
+    the data used for training. This task can be used for pre-training or
+    fine-tuning a Phi-4 model, simply by calling `fit()`.
+
+    This model has a `generate()` method, which generates text based on a
+    prompt. The generation strategy used is controlled by an additional
+    `sampler` argument on `compile()`. You can recompile the model with
+    different `keras_hub.samplers` objects to control the generation. By
+    default, `"top_k"` sampling will be used.
+
+    Args:
+        backbone: A `keras_hub.models.Phi4Backbone` instance.
+        preprocessor: A `keras_hub.models.Phi4CausalLMPreprocessor` or `None`.
+            If `None`, this model will not apply preprocessing, and inputs
+            should be preprocessed before calling the model.
+    """
+
+    backbone_cls = Phi4Backbone
+    preprocessor_cls = Phi4CausalLMPreprocessor
diff --git a/keras_hub/src/models/phi4/phi4_causal_lm_preprocessor.py b/keras_hub/src/models/phi4/phi4_causal_lm_preprocessor.py
new file mode 100644
index 0000000000..63d874daa3
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_causal_lm_preprocessor.py
@@ -0,0 +1,76 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
+from keras_hub.src.models.phi4.phi4_backbone import Phi4Backbone
+from keras_hub.src.models.phi4.phi4_tokenizer import Phi4Tokenizer
+
+
+@keras_hub_export("keras_hub.models.Phi4CausalLMPreprocessor")
+class Phi4CausalLMPreprocessor(CausalLMPreprocessor):
+    """Phi4 Causal LM preprocessor.
+
+    This preprocessing layer is meant for use with
+    `keras_hub.models.Phi4CausalLM`. By default, it will take in batches of
+    strings, and return outputs in a `(x, y, sample_weight)` format, where the
+    `y` label is the next token id in the `x` sequence.
+
+    For use with generation, the layer also exposes two methods
+    `generate_preprocess()` and `generate_postprocess()`. When this preprocessor
+    is attached to a `keras_hub.models.Phi4CausalLM` instance, these methods
+    will be called implicitly in `generate()`. They can also be called
+    standalone (e.g. to precompute preprocessing inputs for generation in a
+    separate process).
+
+    Args:
+        tokenizer: A `keras_hub.models.Phi4Tokenizer` instance.
+        sequence_length: The length of the packed inputs.
+        add_start_token: If `True`, the preprocessor will prepend the tokenizer
+            start token to each input sequence. Default is `True`.
+        add_end_token: If `True`, the preprocessor will append the tokenizer
+            end token to each input sequence. Default is `False`.
+
+    Call arguments:
+        x: A string, `tf.Tensor` or list of python strings.
+        y: Label data. Should always be `None` as the layer generates labels.
+        sample_weight: Label weights. Should always be `None` as the layer
+            generates label weights.
+        sequence_length: Pass to override the configured `sequence_length` of
+            the layer.
+
+    Examples:
+    ```python
+    # Load the preprocessor from a preset.
+    preprocessor = keras_hub.models.Phi4CausalLMPreprocessor.from_preset(
+        "phi4_mini_4k_instruct_en"
+    )
+
+    # Tokenize and pack a single sentence.
+    sentence = tf.constant("League of legends")
+    preprocessor(sentence)
+    # Same output.
+    preprocessor("League of legends")
+
+    # Tokenize a batch of sentences.
+    sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
+    preprocessor(sentences)
+    # Same output.
+    preprocessor(["Taco tuesday", "Fish taco please!"])
+
+    # Map a dataset to preprocess a single sentence.
+    features = tf.constant(
+        [
+            "Avatar 2 is amazing!",
+            "Well, I am not sure.",
+        ]
+    )
+    labels = tf.constant([1, 0])
+    ds = tf.data.Dataset.from_tensor_slices((features, labels))
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+
+    # Map a dataset to preprocess unlabled sentences.
+    ds = tf.data.Dataset.from_tensor_slices(features)
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+    ```
+    """
+
+    backbone_cls = Phi4Backbone
+    tokenizer_cls = Phi4Tokenizer
diff --git a/keras_hub/src/models/phi4/phi4_causal_lm_preprocessor_test.py b/keras_hub/src/models/phi4/phi4_causal_lm_preprocessor_test.py
new file mode 100644
index 0000000000..96f207e817
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_causal_lm_preprocessor_test.py
@@ -0,0 +1,92 @@
+import pytest
+
+from keras_hub.src.models.phi4.phi4_causal_lm_preprocessor import (
+    Phi4CausalLMPreprocessor,
+)
+from keras_hub.src.models.phi4.phi4_tokenizer import Phi4Tokenizer
+from keras_hub.src.tests.test_case import TestCase
+
+
+class Phi4CausalLMPreprocessorTest(TestCase):
+    def setUp(self):
+        self.vocab = ["!", "air", "Ġair", "plane", "Ġat", "port"]
+        self.vocab += [
+            "<s>",
+            "</s>",
+            "<pad>",
+            "<im_start>",
+            "<im_sep>",
+            "<im_end>",
+        ]
+        self.vocab += ["<fim_prefix>", "<fim_middle>", "<fim_suffix>"]
+        self.vocab = dict([(token, i) for i, token in enumerate(self.vocab)])
+        self.merges = ["Ġ a", "Ġ t", "Ġ i", "Ġ b", "a i", "p l", "n e"]
+        self.merges += ["Ġa t", "p o", "r t", "Ġt h", "ai r", "pl a", "po rt"]
+        self.merges += ["Ġai r", "Ġa i", "pla ne"]
+        self.tokenizer = Phi4Tokenizer(
+            vocabulary=self.vocab, merges=self.merges
+        )
+        self.init_kwargs = {
+            "tokenizer": self.tokenizer,
+            "sequence_length": 10,
+        }
+        # [1, 3, 4, 2, 5]
+        self.input_data = (["airplane at airport"],)
+
+    def test_preprocessor_basics(self):
+        self.run_preprocessor_test(
+            cls=Phi4CausalLMPreprocessor,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output=(
+                {
+                    "token_ids": [[6, 1, 3, 4, 2, 5, 0, 0, 0, 0]],
+                    "padding_mask": [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
+                },
+                [[1, 3, 4, 2, 5, 0, 0, 0, 0, 7]],
+                [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
+            ),
+        )
+
+    def test_no_start_end_token(self):
+        input_data = ["airplane at airport"] * 4
+
+        preprocessor = Phi4CausalLMPreprocessor(
+            **self.init_kwargs,
+            add_start_token=False,
+            add_end_token=False,
+        )
+        x, y, sw = preprocessor(input_data)
+        self.assertAllEqual(
+            x["token_ids"], [[1, 3, 4, 2, 5, 0, 0, 0, 0, 0]] * 4
+        )
+        self.assertAllEqual(
+            x["padding_mask"], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] * 4
+        )
+        self.assertAllEqual(y, [[3, 4, 2, 5, 0, 0, 0, 0, 0, 0]] * 4)
+        self.assertAllEqual(sw, [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] * 4)
+
+    def test_generate_preprocess(self):
+        input_data = "airplane at airport"
+        preprocessor = Phi4CausalLMPreprocessor(**self.init_kwargs)
+        x = preprocessor.generate_preprocess(input_data)
+        self.assertAllEqual(x["token_ids"], [6, 1, 3, 4, 2, 5, 0, 0, 0, 0])
+        self.assertAllEqual(x["padding_mask"], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+
+    def test_generate_postprocess(self):
+        input_data = {
+            "token_ids": [1, 3, 4, 2, 5, 3, 9, 7, 11, 0],
+            "padding_mask": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+        }
+        preprocessor = Phi4CausalLMPreprocessor(**self.init_kwargs)
+        x = preprocessor.generate_postprocess(input_data)
+        self.assertAllEqual(x, "airplane at airport")
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in Phi4CausalLMPreprocessor.presets:
+            self.run_preset_test(
+                cls=Phi4CausalLMPreprocessor,
+                preset=preset,
+                input_data=self.input_data,
+            )
diff --git a/keras_hub/src/models/phi4/phi4_causal_lm_test.py b/keras_hub/src/models/phi4/phi4_causal_lm_test.py
new file mode 100644
index 0000000000..28a7c2e797
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_causal_lm_test.py
@@ -0,0 +1,128 @@
+from unittest.mock import patch
+
+import pytest
+from keras import ops
+
+from keras_hub.src.models.phi4.phi4_backbone import Phi4Backbone
+from keras_hub.src.models.phi4.phi4_causal_lm import Phi4CausalLM
+from keras_hub.src.models.phi4.phi4_causal_lm_preprocessor import (
+    Phi4CausalLMPreprocessor,
+)
+from keras_hub.src.models.phi4.phi4_tokenizer import Phi4Tokenizer
+from keras_hub.src.tests.test_case import TestCase
+
+
+class Phi4CausalLMTest(TestCase):
+    def setUp(self):
+        # Move <pad> to index 0 since the tokenizer sets pad_token_id to 0.
+        self.vocab = ["<pad>", "air", "Ġair", "plane", "Ġat", "port"]
+        self.vocab += [
+            "<s>",
+            "</s>",
+            "!",
+            "<im_start>",
+            "<im_sep>",
+            "<im_end>",
+            # Necessary since `Phi3CausalLM` requires this in `generate()`
+            "<|end|>",
+        ]
+        self.vocab += ["<fim_prefix>", "<fim_middle>", "<fim_suffix>"]
+        self.vocab = dict([(token, i) for i, token in enumerate(self.vocab)])
+        self.merges = ["Ġ a", "Ġ t", "Ġ i", "Ġ b", "a i", "p l", "n e"]
+        self.merges += ["Ġa t", "p o", "r t", "Ġt h", "ai r", "pl a", "po rt"]
+        self.merges += ["Ġai r", "Ġa i", "pla ne"]
+        self.preprocessor = Phi4CausalLMPreprocessor(
+            Phi4Tokenizer(vocabulary=self.vocab, merges=self.merges),
+            sequence_length=15,
+        )
+        self.vocab_size = self.preprocessor.tokenizer.vocabulary_size()
+        self.backbone = Phi4Backbone(
+            vocabulary_size=self.vocab_size,
+            num_layers=2,
+            num_query_heads=4,
+            num_key_value_heads=2,
+            hidden_dim=8,
+            intermediate_dim=16,
+        )
+        self.init_kwargs = {
+            "preprocessor": self.preprocessor,
+            "backbone": self.backbone,
+        }
+        self.train_data = ([" airplane at airport", " airplane at airport"],)
+        self.input_data = self.preprocessor(*self.train_data)[0]
+
+    def test_causal_lm_basics(self):
+        self.run_task_test(
+            cls=Phi4CausalLM,
+            init_kwargs=self.init_kwargs,
+            train_data=self.train_data,
+            expected_output_shape=(2, 15, self.vocab_size),
+        )
+
+    def test_generate(self):
+        causal_lm = Phi4CausalLM(**self.init_kwargs)
+        # String input.
+        prompt = " airplane at airport"
+        output = causal_lm.generate(" airplane at airport")
+        self.assertTrue(prompt in output)
+        # Int tensor input.
+        prompt_ids = self.preprocessor.generate_preprocess([prompt])
+        causal_lm.preprocessor = None
+        outputs = causal_lm.generate(prompt_ids, stop_token_ids=None)
+        # Assert prompt is in output in token id space.
+        self.assertAllEqual(
+            outputs["token_ids"][:, :5],
+            prompt_ids["token_ids"][:, :5],
+        )
+        self.assertAllEqual(
+            outputs["padding_mask"][:, :5],
+            prompt_ids["padding_mask"][:, :5],
+        )
+
+    def test_early_stopping(self):
+        causal_lm = Phi4CausalLM(**self.init_kwargs)
+        call_with_cache = causal_lm.call_with_cache
+
+        def wrapper(*args, **kwargs):
+            """Modify output logits to always favor end_token_id"""
+            logits, hidden_states, cache = call_with_cache(*args, **kwargs)
+            index = self.preprocessor.tokenizer.end_token_id
+            update = ops.ones_like(logits)[:, :, index] * 1.0e9
+            update = ops.expand_dims(update, axis=-1)
+            logits = ops.slice_update(logits, (0, 0, index), update)
+            return logits, hidden_states, cache
+
+        with patch.object(causal_lm, "call_with_cache", wraps=wrapper):
+            prompt = [" airplane at airport", " airplane"]
+            output = causal_lm.generate(prompt, max_length=7)
+            # We should immediately abort and output the prompt.
+            self.assertEqual(prompt, output)
+
+    def test_generate_compilation(self):
+        causal_lm = Phi4CausalLM(**self.init_kwargs)
+        # Assert we do not recompile with successive calls.
+        causal_lm.generate("the fox")
+        first_fn = causal_lm.generate_function
+        causal_lm.generate("the fox")
+        second_fn = causal_lm.generate_function
+        self.assertEqual(first_fn, second_fn)
+        # Assert we do recompile after compile is called.
+        causal_lm.compile(sampler="greedy")
+        self.assertIsNone(causal_lm.generate_function)
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=Phi4CausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in Phi4CausalLM.presets:
+            self.run_preset_test(
+                cls=Phi4CausalLM,
+                preset=preset,
+                input_data=self.input_data,
+            )
diff --git a/keras_hub/src/models/phi4/phi4_tokenizer.py b/keras_hub/src/models/phi4/phi4_tokenizer.py
new file mode 100644
index 0000000000..de0ef3c3ef
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_tokenizer.py
@@ -0,0 +1,86 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.phi4.phi4_backbone import Phi4Backbone
+from keras_hub.src.tokenizers.byte_pair_tokenizer import BytePairTokenizer
+
+
+@keras_hub_export(
+    [
+        "keras_hub.tokenizers.Phi4Tokenizer",
+        "keras_hub.models.Phi4Tokenizer",
+    ]
+)
+class Phi4Tokenizer(BytePairTokenizer):
+    """Phi4 tokenizer using Byte-Pair Encoding subword segmentation.
+
+    This tokenizer class will tokenize raw strings into integer sequences and
+    is based on `keras_hub.tokenizers.BytePairTokenizer`. Unlike the
+    underlying tokenizer, it will check for all special tokens needed by
+    Phi4 models and provides a `from_preset()` method to automatically
+    download a matching vocabulary for a Phi4 preset.
+
+    If input is a batch of strings (rank > 0), the layer will output a
+    `tf.RaggedTensor` where the last dimension of the output is ragged.
+
+    If input is a scalar string (rank == 0), the layer will output a dense
+    `tf.Tensor` with static shape `[None]`.
+
+    Args:
+        vocabulary: string or dict, maps token to integer ids. If it is a
+            string, it should be the file path to a json file.
+        merges: string or list, contains the merge rule. If it is a string,
+            it should be the file path to merge rules. The merge rule file
+            should have one merge rule per line. Every merge rule contains
+            merge entities separated by a space.
+        sequence_length: int. If set, the output will be
+            padded or truncated to the `sequence_length`. Defaults to 100,352
+            based on the [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)
+
+    Examples:
+    ```python
+    # Unbatched input.
+    tokenizer = keras_hub.models.Phi4Tokenizer.from_preset(
+        "phi4_mini_4k_instruct_en",
+    )
+    tokenizer("The quick brown fox jumped.")
+
+    # Batched input.
+    tokenizer(["The quick brown fox jumped.", "The fox slept."])
+
+    # Detokenization.
+    tokenizer.detokenize(tokenizer("The quick brown fox jumped."))
+    ```
+
+    # References
+
+    - [Phi-4 tokenizer config](https://huggingface.co/microsoft/phi-4/raw/main/tokenizer.json)
+    """
+
+    backbone_cls = Phi4Backbone
+
+    def __init__(
+        self,
+        vocabulary=None,
+        merges=None,
+        sequence_length=100_352,
+        **kwargs,
+    ):
+        self._add_special_token("<s>", "start_token")
+        self._add_special_token("</s>", "end_token")
+        self._add_special_token("<pad>", "pad_token")
+
+        # FIM = Fill-in-the-middle, which uses special tokens to identify
+        # the prefix/middle/suffix part of the input/output for coding tasks.
+        self._add_special_token("<fim_prefix>", "fim_prefix")
+        self._add_special_token("<fim_middle>", "fim_middle")
+        self._add_special_token("<fim_suffix>", "fix_suffix")
+
+        self._add_special_token("<im_start>", "input_message_start")
+        self._add_special_token("<im_sep>", "input_message_separator")
+        self._add_special_token("<im_end>", "input_message_end")
+
+        super().__init__(
+            vocabulary=vocabulary,
+            merges=merges,
+            sequence_length=sequence_length,
+            **kwargs,
+        )
diff --git a/keras_hub/src/models/phi4/phi4_tokenizer_test.py b/keras_hub/src/models/phi4/phi4_tokenizer_test.py
new file mode 100644
index 0000000000..e5bafa416c
--- /dev/null
+++ b/keras_hub/src/models/phi4/phi4_tokenizer_test.py
@@ -0,0 +1,61 @@
+import pytest
+
+from keras_hub.src.models.phi4.phi4_tokenizer import Phi4Tokenizer
+from keras_hub.src.tests.test_case import TestCase
+
+
+class Phi4TokenizerTest(TestCase):
+    def setUp(self):
+        self.vocab = ["!", "air", "Ġair", "plane", "Ġat", "port"]
+        self.vocab += [
+            "<s>",
+            "</s>",
+            "<pad>",
+            "<im_start>",
+            "<im_sep>",
+            "<im_end>",
+        ]
+        self.vocab += ["<fim_prefix>", "<fim_middle>", "<fim_suffix>"]
+        self.vocab = dict([(token, i) for i, token in enumerate(self.vocab)])
+        self.merges = ["Ġ a", "Ġ t", "Ġ i", "Ġ b", "a i", "p l", "n e"]
+        self.merges += ["Ġa t", "p o", "r t", "Ġt h", "ai r", "pl a", "po rt"]
+        self.merges += ["Ġai r", "Ġa i", "pla ne"]
+        self.init_kwargs = {
+            "vocabulary": self.vocab,
+            "merges": self.merges,
+            "sequence_length": None,
+        }
+        self.input_data = [
+            "<s> airplane at airport</s><pad>",
+            " airplane airport",
+        ]
+
+    def test_tokenizer_basics(self):
+        self.run_preprocessing_layer_test(
+            cls=Phi4Tokenizer,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output=[[6, 2, 3, 4, 2, 5, 7, 8], [2, 3, 2, 5]],
+        )
+
+    def test_errors_missing_special_tokens(self):
+        with self.assertRaises(ValueError):
+            Phi4Tokenizer(vocabulary={"foo": 0, "bar": 1}, merges=["fo o"])
+
+    @pytest.mark.large
+    def test_smallest_preset(self):
+        self.run_preset_test(
+            cls=Phi4Tokenizer,
+            preset="phi4_mini_4k_instruct_en",
+            input_data=["The quick brown fox."],
+            expected_output=[[791, 4062, 14198, 39935, 13]],
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in Phi4Tokenizer.presets:
+            self.run_preset_test(
+                cls=Phi4Tokenizer,
+                preset=preset,
+                input_data=self.input_data,
+            )