diff --git a/.ci/cspell_dict.txt b/.ci/cspell_dict.txt
index ce79be5034d..44977d86464 100644
--- a/.ci/cspell_dict.txt
+++ b/.ci/cspell_dict.txt
@@ -505,4 +505,4 @@ yolov
 yscale
 yujie
 yury
-zfnet
\ No newline at end of file
+zfnet
diff --git a/examples/llm_compression/openvino/smollm2_360m_codebook/README.md b/examples/llm_compression/openvino/smollm2_360m_codebook/README.md
new file mode 100644
index 00000000000..c82045d6261
--- /dev/null
+++ b/examples/llm_compression/openvino/smollm2_360m_codebook/README.md
@@ -0,0 +1,26 @@
+# Large Language Models FP8 Compression Example
+
+This example demonstrates how to apply codebook compression to [HuggingFaceTB/SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct) model. It can be useful for evaluation and early HW enablement purposes.
+
+## Prerequisites
+
+To use this example:
+
+- Create a separate Python* environment and activate it: `python3 -m venv nncf_env && source nncf_env/bin/activate`
+- Install dependencies:
+
+```bash
+pip install -U pip
+pip install -r requirements.txt
+pip install ../../../../
+```
+
+## Run Example
+
+To run example:
+
+```bash
+python main.py
+```
+
+It will automatically download the dataset and baseline model and save the resulting model.
diff --git a/examples/llm_compression/openvino/smollm2_360m_codebook/main.py b/examples/llm_compression/openvino/smollm2_360m_codebook/main.py
new file mode 100644
index 00000000000..a5b27104218
--- /dev/null
+++ b/examples/llm_compression/openvino/smollm2_360m_codebook/main.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+import numpy as np
+from optimum.intel.openvino import OVModelForCausalLM
+from torch.jit import TracerWarning
+from transformers import AutoTokenizer
+from transformers import logging
+
+import nncf
+
+logging.set_verbosity_error()
+warnings.filterwarnings("ignore", category=TracerWarning)
+
+
+MODEL_ID = "HuggingFaceTB/SmolLM2-360M-Instruct"
+COMPRESSED_MODEL_ID = "smollm2_360m_compressed_codebook"
+
+
+def generate_answers(
+    questions: list[str], model: OVModelForCausalLM, tokenizer: AutoTokenizer, max_new_tokens: int = 50
+) -> dict[str, str]:
+    """
+    Generate answers for a list of questions using the provided model and tokenizer.
+
+    :param questions: List of questions to be answered.
+    :param model: The model to use for generating answers.
+    :param tokenizer: The tokenizer to use for processing the input and output.
+    :param max_new_tokens: Maximum number of new tokens to generate for each answer. Defaults to 50.
+    :return: A dictionary mapping each question to its corresponding answer.
+    """
+    messages = [
+        {"role": "system", "content": "You are a chatbot who always responds as short as possible."},
+        {"role": "user", "content": "What is the capital of Spain?"},
+        {"role": "assistant", "content": "Madrid."},
+    ]
+    answers_by_questions = {}
+
+    for question in questions:
+        messages.append({"role": "user", "content": question})
+        input_ids = tokenizer.apply_chat_template(
+            messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
+        ).to(device=model.device)
+        input_len = len(input_ids[0])
+
+        output = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=False)[0]
+        answer = tokenizer.decode(output[input_len:], skip_special_tokens=True)
+        answers_by_questions[question] = answer
+        messages.append({"role": "assistant", "content": answer})
+
+    return answers_by_questions
+
+
+def print_answers(header: str, answers_by_questions: list[str]) -> None:
+    """
+    Print the answers to the console.
+
+    :param header: Header to print before the answers.
+    :param answers_by_questions: Dictionary mapping questions to their answers.
+    """
+    print(header)
+    for question, answer in answers_by_questions.items():
+        print(f"Q: {question}\nA: {answer}\n")
+
+
+QUESTIONS = [
+    "What is the capital of France?",
+    "What is the highest peak in the Alps?",
+    "What is the largest city in Canada?",
+    "What is the most visited city in Japan?",
+]
+
+
+def load_model_and_tokenizer(model_id: str, export=True) -> tuple[OVModelForCausalLM, AutoTokenizer]:
+    """
+    Load the model and tokenizer from the specified model ID.
+
+    :param model_id: The identifier of the model to load.
+    :param export: Whether to export the model for OpenVINO. Defaults to True.
+    :return: A tuple containing the loaded model and tokenizer.
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
+    model = OVModelForCausalLM.from_pretrained(
+        model_id,
+        export=export,
+        load_in_8bit=False,
+    )
+    return model, tokenizer
+
+
+def default_codebook_example(model_id: str, compressed_model_id: str) -> list[str]:
+    """
+    Example of using the default codebook compression.
+
+    :param model_id: The identifier of the model to load.
+    :param compressed_model_id: The identifier for the compressed model to save.
+    :return: A list of answers generated by the model after compression.
+    """
+    model, tokenizer = load_model_and_tokenizer(model_id)
+    answers_by_questions = generate_answers(QUESTIONS, model, tokenizer)
+    print_answers("Non-optimized model outputs:\n", answers_by_questions)
+
+    model.model = nncf.compress_weights(model.model, mode=nncf.CompressWeightsMode.CB4_F8E4M3, ratio=1.0, group_size=64)
+    model.save_pretrained(compressed_model_id)
+    tokenizer.save_pretrained(compressed_model_id)
+
+    model, tokenizer = load_model_and_tokenizer(compressed_model_id, False)
+    answers_by_questions = generate_answers(QUESTIONS, model, tokenizer)
+    print_answers("Optimized model outputs:\n", answers_by_questions)
+
+    return list(answers_by_questions.values())
+
+
+def custom_codebook_example(model_id: str, compressed_model_id: str) -> list[str]:
+    """
+    Example of using the custom codebook compression.
+
+    :param model_id: The identifier of the model to load.
+    :param compressed_model_id: The identifier for the compressed model to save.
+    :return: A list of answers generated by the model after compression.
+    """
+    model, tokenizer = load_model_and_tokenizer(model_id)
+
+    answers_by_questions = generate_answers(QUESTIONS, model, tokenizer)
+    print_answers("Non-optimized model outputs:\n", answers_by_questions)
+
+    codebook = np.array([-8, -4, -2, -1, 0, 1, 2, 4, 8], dtype=np.int8)
+
+    model.model = nncf.compress_weights(
+        model.model,
+        mode=nncf.CompressWeightsMode.CODEBOOK,
+        ratio=1.0,
+        group_size=-1,
+        advanced_parameters=nncf.AdvancedCompressionParameters(codebook=codebook),
+    )
+    model.save_pretrained(compressed_model_id)
+    tokenizer.save_pretrained(compressed_model_id)
+
+    model, tokenizer = load_model_and_tokenizer(compressed_model_id, False)
+    answers_by_questions = generate_answers(QUESTIONS, model, tokenizer)
+    print_answers("Optimized model outputs:\n", answers_by_questions)
+
+    return list(answers_by_questions.values())
+
+
+def main():
+    res = default_codebook_example(MODEL_ID, COMPRESSED_MODEL_ID)
+    res += custom_codebook_example(MODEL_ID, COMPRESSED_MODEL_ID + "_custom")
+    return res
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt b/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt
new file mode 100644
index 00000000000..feab3bfd695
--- /dev/null
+++ b/examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt
@@ -0,0 +1,4 @@
+openvino==2025.1
+optimum-intel[openvino]>=1.22.0
+transformers>=4.48.0
+onnx==1.17.0
diff --git a/src/nncf/openvino/graph/metatypes/openvino_metatypes.py b/src/nncf/openvino/graph/metatypes/openvino_metatypes.py
index c7726276e00..214bce563f1 100644
--- a/src/nncf/openvino/graph/metatypes/openvino_metatypes.py
+++ b/src/nncf/openvino/graph/metatypes/openvino_metatypes.py
@@ -817,7 +817,13 @@ def _is_embedding(node: ov.Node) -> bool:
     allowed_types_list = ["f16", "f32", "f64"]
     const_port_id = 0
     input_tensor = node.input_value(const_port_id)
-    if input_tensor.get_element_type().get_type_name() in allowed_types_list:
+    input_type = input_tensor.get_element_type().get_type_name()
+
+    # TODO(aanuf): Implement a pattern based check for embedding.
+    if node.friendly_name.endswith("nncf_codebook"):
+        return False
+
+    if input_type in allowed_types_list:
         const_node = get_operation_const_op(node, const_port_id)
         if const_node is not None:
             return True
diff --git a/src/nncf/openvino/graph/node_utils.py b/src/nncf/openvino/graph/node_utils.py
index 32ed821b7d1..5faec5e904e 100644
--- a/src/nncf/openvino/graph/node_utils.py
+++ b/src/nncf/openvino/graph/node_utils.py
@@ -44,6 +44,7 @@
 from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype
 from nncf.tensor import Tensor
 from nncf.tensor import TensorBackend
+from nncf.tensor import TensorDataType
 
 InplaceInsertionFnType = Callable[[ov.Node, int, str], ov.Node]
 
@@ -685,3 +686,27 @@ def create_ov_const_from_tensor(x: Tensor, dtype: ov.Type, name: Optional[str] =
         return opset.constant(x.data, name=name, shared_memory=True)
     const = opset.constant(x.data, dtype=dtype, name=name)
     return const
+
+
+def create_ov_codebook_subgraph(
+    codebook: Tensor, indexes: Tensor, dtype: ov.Type, name: Optional[str] = None
+) -> op.Constant:
+    """
+    Create an OpenVINO subgraph with gather from the given codebook and indexes tensors.
+
+    :param codebook: Codebook tensor.
+    :param indexes: Indexes tensor.
+    :param dtype: Data type of the indexes.
+    :param name: Optional name of the constant.
+    :return: OpenVINO subgraph.
+    """
+    codebook_const = opset.constant(codebook.data, name=name)
+    if codebook.dtype != TensorDataType.float16:
+        codebook_const = opset.convert(codebook_const, destination_type=ov.Type.f16)
+
+    codebook_indexes = opset.constant(indexes.data, dtype=dtype, name=name + "_nncf_codebook_idxs")
+    if dtype == ov.Type.u4:
+        codebook_indexes = opset.convert(codebook_indexes, destination_type=ov.Type.u8)
+
+    const = opset.gather(codebook_const, codebook_indexes, 0, name=name + "_nncf_codebook")
+    return const
diff --git a/src/nncf/openvino/optimized_functions/functions.py b/src/nncf/openvino/optimized_functions/functions.py
index 2a11e4c3608..e22ea481abd 100644
--- a/src/nncf/openvino/optimized_functions/functions.py
+++ b/src/nncf/openvino/optimized_functions/functions.py
@@ -105,7 +105,7 @@ def do_float_quantization(
     config: WeightCompressionConfig,
     reduction_axes: Optional[ReductionAxes] = None,
     precomputed_scale: Optional[Tensor] = None,
-) -> tuple[Tensor, Tensor]:
+) -> tuple[Tensor, Tensor, Tensor]:
     """
     Computes quantization scale if not provided, and performs corresponding nf4 weight quantization.
     For NF4 quantization quantizes the weights to 16 levels on [-1, 1] interval.
@@ -151,7 +151,7 @@ def do_float_quantization(
         compressed_weight = model([weight, precomputed_scale])[0]
         scale = precomputed_scale
 
-    return compressed_weight, scale
+    return compressed_weight, scale, None
 
 
 def integer_quantize_dequantize_weight(
diff --git a/src/nncf/parameters.py b/src/nncf/parameters.py
index 0c8753f5530..e1269ea78e1 100644
--- a/src/nncf/parameters.py
+++ b/src/nncf/parameters.py
@@ -85,6 +85,8 @@ class CompressWeightsMode(StrEnum):
     :param NF4: The the same as INT4_SYM mode, but primary precision is NF4 data type without zero point.
     :param INT8: Mode is deprecated and will be removed in future releases. Please use `INT8_ASYM` instead.
     :param E2M1: FP4 format from "OCP Microscaling Formats (MX) Specification" Version 1.0.
+    :param CODEBOOK: Codebook (LUT) quantization format.
+    :param CB4_F8E4M3: Codebook (LUT) format with 16 fixed fp8 values in E4M3 format.
     """
 
     INT8_SYM = "int8_sym"
@@ -92,8 +94,10 @@ class CompressWeightsMode(StrEnum):
     INT4_SYM = "int4_sym"
     INT4_ASYM = "int4_asym"
     NF4 = "nf4"
+    CB4_F8E4M3 = "cb4_f8e4m3"
     INT8 = "int8"  # Deprecated mode
     E2M1 = "e2m1"
+    CODEBOOK = "codebook"
 
 
 @api(canonical_alias="nncf.CompressionFormat")
diff --git a/src/nncf/quantization/advanced_parameters.py b/src/nncf/quantization/advanced_parameters.py
index 10f18b34eae..4de0152188f 100644
--- a/src/nncf/quantization/advanced_parameters.py
+++ b/src/nncf/quantization/advanced_parameters.py
@@ -29,6 +29,8 @@
 from nncf.quantization.range_estimator import RangeEstimatorParameters
 from nncf.quantization.range_estimator import StatisticsType
 
+TTensor = Any
+
 
 @api(canonical_alias="nncf.OverflowFix")
 class OverflowFix(StrEnum):
@@ -379,6 +381,9 @@ class AdvancedCompressionParameters:
     :type lora_adapter_rank: int
     :param backend_params: Backend-specific parameters.
     :type backend_params: dict[str, Any]
+    :param codebook: The codebook (LUT) for the weight compression.
+        Applicable for vector quantization. Must be a numpy array or ov Tensor.
+    :type codebook: TTensor
     """
 
     statistics_path: Optional[str] = None
@@ -390,6 +395,7 @@ class AdvancedCompressionParameters:
     lora_correction_params: AdvancedLoraCorrectionParameters = field(default_factory=AdvancedLoraCorrectionParameters)
     lora_adapter_rank: int = 256
     backend_params: dict[str, Any] = field(default_factory=dict)
+    codebook: Optional[TTensor] = None
 
 
 @api()
diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
index 8f75dbc4013..7ab4d2d1813 100644
--- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py
+++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -39,6 +39,7 @@
 from nncf.quantization.algorithms.algorithm import Algorithm
 from nncf.quantization.algorithms.weight_compression.awq import AWQ
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
+from nncf.quantization.algorithms.weight_compression.constants import CB4_QUANTILES
 from nncf.quantization.algorithms.weight_compression.gptq import GPTQ
 from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
 from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA
@@ -46,6 +47,8 @@
 from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig
 from nncf.scopes import IgnoredScope
 from nncf.scopes import get_ignored_node_names_from_ignored_scope
+from nncf.tensor import Tensor
+from nncf.tensor import functions as fns
 from nncf.tensor.definitions import TensorDataType
 
 TModel = TypeVar("TModel")
@@ -179,6 +182,24 @@ def check_user_compression_configuration(
             ]
         )
         ranks = [advanced_parameters.lora_adapter_rank, advanced_parameters.lora_correction_params.adapter_rank]
+
+        codebook = advanced_parameters.codebook
+        if codebook is not None:
+            # OpenVINO Tensor is not support functions to validate codebook
+            np_codebook = Tensor(codebook).as_numpy_tensor()
+            msg = None
+            if np_codebook.ndim != 1:
+                msg = "The codebook must be a 1D array, but a multi-dimensional array is given."
+            elif np_codebook.size < 2:
+                msg = (
+                    "The codebook must contain at least two unique elements,"
+                    "but a single-element or empty array is given."
+                )
+            elif fns.any(np_codebook[:-1] >= np_codebook[1:]):
+                msg = "The codebook must be a sorted 1D array with unique elements, but an unsorted array is given."
+            if msg:
+                raise nncf.ValidationError(msg)
+
     for size in values_to_check:
         if size <= 0:
             msg = f"The subset_size value should be positive, but subset_size={size} is given."
@@ -207,6 +228,10 @@ def check_user_compression_configuration(
         msg = "LoRA Correction algorithm is not compatible with FQ, FQ_LORA and FQ_LORA_NLS compression formats."
         raise nncf.ValidationError(msg)
 
+    if mode == CompressWeightsMode.CODEBOOK and (advanced_parameters is None or advanced_parameters.codebook is None):
+        msg = "Codebook compression mode requires codebook parameters to be specified in advanced_parameters."
+        raise nncf.ValidationError(msg)
+
 
 class WeightCompression(Algorithm):
     """
@@ -293,7 +318,7 @@ def __init__(
             advanced_parameters if advanced_parameters is not None else AdvancedCompressionParameters()
         )
 
-        primary_config = WeightCompressionConfig(mode=self._mode, group_size=self._group_size)
+        primary_config = self._get_primary_config()
         criterion_cls = MIXED_PRECISION_CRITERIA.get(self._sensitivity_metric)
         self._mixed_precision_algo = criterion_cls(primary_config, self._ratio, self._subset_size)
         self._statistics_path = self._advanced_parameters.statistics_path
@@ -429,6 +454,20 @@ def _get_ratio_defining_params(
 
         return ratio_defining_params
 
+    def _get_primary_config(self):
+        codebook_values = None
+
+        if self._mode == CompressWeightsMode.CB4_F8E4M3:
+            codebook_values = Tensor(CB4_QUANTILES)
+        elif self._mode == CompressWeightsMode.CODEBOOK:
+            codebook_values = Tensor(self._advanced_parameters.codebook)
+
+        return WeightCompressionConfig(
+            mode=self._mode,
+            group_size=self._group_size,
+            codebook_values=codebook_values,
+        )
+
     def _set_weight_compression_config(
         self,
         ratio_defining_params: list[WeightCompressionParameters],
@@ -445,7 +484,7 @@ def _set_weight_compression_config(
         :param graph: The model graph associated with the model.
         :param statistics_points: Statistics points.
         """
-        primary_config = WeightCompressionConfig(mode=self._mode, group_size=self._group_size)
+        primary_config = self._get_primary_config()
         if self._ratio == 1:
             for weight_param in ratio_defining_params:
                 weight_param.compression_config = primary_config
@@ -653,13 +692,13 @@ def apply(
             # del is used to prematurely mark non-necessary data as free for garbage collection
             del self.awq_algo
 
-        scales = {}
-        zero_points = {}
+        precomputed_compressed_weights = None
         lora_correction_algo = None
         description = "Applying Weight Compression"
+
         if self._gptq:
             del statistics
-            model, scales, zero_points = self._gptq_algo.apply(
+            model, precomputed_compressed_weights = self._gptq_algo.apply(
                 model=model,
                 graph=graph,
                 dataset=dataset,
@@ -668,7 +707,7 @@ def apply(
             )
         else:
             if self._scale_estimation:
-                scales, zero_points = self._scale_estimation_algo.apply(
+                precomputed_compressed_weights = self._scale_estimation_algo.apply(
                     model=model,
                     graph=graph,
                     all_weight_params=all_weight_params,
@@ -691,8 +730,7 @@ def apply(
             model,
             graph,
             track(all_weight_params, description=description, weights=all_weight_sizes),
-            scales,
-            zero_points,
+            precomputed_compressed_weights,
             lora_correction_algo,
             self._compression_format,
             self._advanced_parameters,
diff --git a/src/nncf/quantization/algorithms/weight_compression/awq.py b/src/nncf/quantization/algorithms/weight_compression/awq.py
index fbab09a1fdf..fa423828fc1 100644
--- a/src/nncf/quantization/algorithms/weight_compression/awq.py
+++ b/src/nncf/quantization/algorithms/weight_compression/awq.py
@@ -25,7 +25,6 @@
 from nncf.common.utils.backend import BackendType
 from nncf.common.utils.backend import get_backend
 from nncf.experimental.common.tensor_statistics.statistics import WCTensorStatistic
-from nncf.parameters import CompressWeightsMode
 from nncf.quantization.algorithms.algorithm import Algorithm
 from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
@@ -250,7 +249,7 @@ def _data_aware_step(self, wp, weight, statistics):
             for _ in range(self._steps):
                 cur_scale = gscale**alpha
                 weights_to_fake_quantize = gweight * cur_scale
-                if config.mode == CompressWeightsMode.NF4:
+                if not config.is_integer:
                     g_decompressed_weighs = float_quantize_dequantize_weight(
                         weights_to_fake_quantize, awq_config, reduction_axis
                     )
diff --git a/src/nncf/quantization/algorithms/weight_compression/backend.py b/src/nncf/quantization/algorithms/weight_compression/backend.py
index 62d0745a0f4..e2257168ad3 100644
--- a/src/nncf/quantization/algorithms/weight_compression/backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/backend.py
@@ -28,6 +28,7 @@
 from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.tensor import Tensor
 from nncf.tensor import TensorDataType
 
@@ -148,8 +149,7 @@ def transform_model(
         model: TModel,
         graph: NNCFGraph,
         weight_compression_parameters: Iterable[WeightCompressionParameters],
-        precomputed_scales: dict[str, Tensor] = None,
-        precomputed_zero_points: dict[str, Tensor] = None,
+        precomputed_compressed_weights: Optional[dict[str, CompressedWeight]] = None,
         lora_correction_algo: Optional[LoraCorrectionAlgorithm] = None,
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
@@ -160,8 +160,7 @@ def transform_model(
         :param model: Model in which the weights will be compressed according to the weight compression description.
         :param graph: The graph associated with the model.
         :param weight_compression_parameters: An iterable of weight compression parameters.
-        :param precomputed_scales: Precomputed scales for weight compression.
-        :param precomputed_zero_points: Precomputed zero points for weight compression.
+        :param precomputed_compressed_weights: Precomputed scales, zero points, or codebook for weight compression.
         :param lora_correction_algo: An optional algorithm to reduce quantization noise after weight compression by
             using low-rank adapters. This algorithm not only overrides weights with their quantized counterparts but
             also expands the model's execution graph following the Low-Rank Adaptation (LoRA) concept.
diff --git a/src/nncf/quantization/algorithms/weight_compression/config.py b/src/nncf/quantization/algorithms/weight_compression/config.py
index 63ed892c472..1d5376b3454 100644
--- a/src/nncf/quantization/algorithms/weight_compression/config.py
+++ b/src/nncf/quantization/algorithms/weight_compression/config.py
@@ -18,6 +18,7 @@
 from nncf.parameters import CompressWeightsMode
 
 TWeightType = TypeVar("TWeightType")
+TTensor = TypeVar("TTensor")
 
 
 @dataclass
@@ -28,10 +29,14 @@ class WeightCompressionConfig:
     :param mode: Defines a mode for weight compression. Defaults to INT8_ASYM mode.
     :param group_size: Number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale).
         The value -1 means no grouping. Defaults to -1.
+    :param codebook_values: Optional codebook values for CODEBOOK compression mode.
+        Must be fns.Tensor which wraps numpy array or ov tensor. Storing ov tensor is useful for having
+        destination data type information available.
     """
 
     mode: Optional[CompressWeightsMode] = CompressWeightsMode.INT8_ASYM
     group_size: Optional[int] = -1
+    codebook_values: Optional[TTensor] = None
 
     @property
     def num_bits(self):
@@ -49,7 +54,22 @@ def is_integer(self):
         """
         :return: True if compression type in integer, else False.
         """
-        return self.mode not in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
+        return self.mode not in [
+            CompressWeightsMode.NF4,
+            CompressWeightsMode.E2M1,
+            CompressWeightsMode.CODEBOOK,
+            CompressWeightsMode.CB4_F8E4M3,
+        ]
+
+    @property
+    def is_codebook(self):
+        """
+        :return: True if compression type is codebook, else False.
+        """
+        return self.mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]
+
+    def get_numpy_codebook(self):
+        return self.codebook_values.as_numpy_tensor()
 
     def __hash__(self):
         return hash((self.mode.value, self.group_size))
diff --git a/src/nncf/quantization/algorithms/weight_compression/constants.py b/src/nncf/quantization/algorithms/weight_compression/constants.py
new file mode 100644
index 00000000000..6119fd8f83c
--- /dev/null
+++ b/src/nncf/quantization/algorithms/weight_compression/constants.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+NF4_QUANTILES = np.array(
+    [
+        -1.0,
+        -0.6961928009986877,
+        -0.5250730514526367,
+        -0.39491748809814453,
+        -0.28444138169288635,
+        -0.18477343022823334,
+        -0.09105003625154495,
+        0.0,
+        0.07958029955625534,
+        0.16093020141124725,
+        0.24611230194568634,
+        0.33791524171829224,
+        0.44070982933044434,
+        0.5626170039176941,
+        0.7229568362236023,
+        1.0,
+    ],
+    dtype=np.float32,
+)
+
+
+CB4_QUANTILES = np.array(
+    [
+        -3.5,
+        -2.5,
+        -1.875,
+        -1.375,
+        -1.0,
+        -0.625,
+        -0.3125,
+        0.0,
+        0.28125,
+        0.5625,
+        0.875,
+        1.125,
+        1.5,
+        2.0,
+        2.5,
+        3.5,
+    ],
+    dtype=np.float32,
+)
+
+
+CENTER_OF_NF4_QUANTILES = np.array(
+    [
+        -0.84809643,
+        -0.6106329,
+        -0.45999527,
+        -0.33967942,
+        -0.2346074,
+        -0.13791174,
+        -0.045525018,
+        0.03979015,
+        0.120255254,
+        0.20352125,
+        0.29201376,
+        0.38931254,
+        0.5016634,
+        0.6427869,
+        0.8614784,
+    ],
+    dtype=np.float32,
+)
diff --git a/src/nncf/quantization/algorithms/weight_compression/gptq.py b/src/nncf/quantization/algorithms/weight_compression/gptq.py
index 1de6f549851..814ec4a2a6b 100644
--- a/src/nncf/quantization/algorithms/weight_compression/gptq.py
+++ b/src/nncf/quantization/algorithms/weight_compression/gptq.py
@@ -25,6 +25,7 @@
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.algorithms.weight_compression.scale_estimation import ScaleEstimation
 from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_float_quantization_params
 from nncf.quantization.algorithms.weight_compression.weight_lowering import calculate_integer_quantization_params
@@ -83,7 +84,7 @@ def apply(
         weight_compression_parameters: list[WeightCompressionParameters],
         statistic_points: Optional[StatisticPointsContainer] = None,
         backend_entity: Optional[WeightCompressionAlgoBackend] = None,
-    ) -> tuple[TModel, dict[str, Tensor], dict[str, Tensor]]:
+    ) -> tuple[TModel, dict[str, CompressedWeight]]:
         """
         Applies the GPTQ algorithm to quantize the weights of the given model.
 
@@ -99,8 +100,7 @@ def apply(
         if self._backend_entity is None:
             self._set_backend_entity(model)
 
-        scales = {}
-        zero_points = {}
+        res = {}
 
         target_nodes = []
         target_nodes_wc_params_map = {}
@@ -123,10 +123,9 @@ def apply(
             _, input_tensors = next(iter(inputs.items()))
             hessian = self._calculate_hessian(node, input_tensors)
             scale, zero_point = self._quantize_weights(model, graph, wc_params, hessian, input_tensors)
-            scales[wc_params.weight_name] = scale
-            zero_points[wc_params.weight_name] = zero_point
+            res[wc_params.weight_name] = CompressedWeight(None, scale, zero_point, None)
 
-        return model, scales, zero_points
+        return model, res
 
     def get_statistic_points(
         self,
@@ -235,7 +234,9 @@ def _quantize_weights(
             else weight_tensor.shape[1]
         )
         reduction_axes = wc_params.reduction_axes
-        block_compression_config = WeightCompressionConfig(mode=wc_params.compression_config.mode)
+        block_compression_config = WeightCompressionConfig(
+            mode=wc_params.compression_config.mode, codebook_values=wc_params.compression_config.codebook_values
+        )
 
         damp = self._damp_percent * fns.mean(fns.diag(hessian))
         diag_indices = fns.arange(columns, backend=hessian.backend, device=hessian.device)
@@ -260,7 +261,7 @@ def _quantize_weights(
                 hessian_diag_val = hessian_inv_block[i, i]
 
                 if (i1 + i) % group_size == 0:
-                    if block_compression_config.mode == CompressWeightsMode.NF4:
+                    if not block_compression_config.is_integer:
                         scale = calculate_float_quantization_params(
                             weight_tensor[:, (i1 + i) : (i1 + i + group_size)], reduction_axes, block_compression_config
                         )
@@ -284,7 +285,7 @@ def _quantize_weights(
                         scales.append(scale)
                         zero_points.append(zero_point)
 
-                if block_compression_config.mode == CompressWeightsMode.NF4:
+                if not block_compression_config.is_integer:
                     quantized_col = float_quantize_dequantize_weight(
                         fns.unsqueeze(weight_col, 1),
                         block_compression_config,
diff --git a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
index 2bac7048639..0e7e1897813 100644
--- a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
@@ -49,7 +49,7 @@
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
-from nncf.quantization.algorithms.weight_compression.weight_lowering import CompressedWeight
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.algorithms.weight_compression.weight_lowering import compress_weight
 from nncf.tensor import Tensor
 from nncf.tensor.definitions import TensorDataType
@@ -201,8 +201,7 @@ def transform_model(
         model: onnx.ModelProto,
         graph: NNCFGraph,
         weight_compression_parameters: Iterable[WeightCompressionParameters],
-        precomputed_scales: dict[str, Tensor] = None,
-        precomputed_zero_points: dict[str, Tensor] = None,
+        precomputed_compressed_weights: Optional[dict[str, CompressedWeight]] = None,
         lora_correction_algo: Optional[LoraCorrectionAlgorithm] = None,
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
@@ -214,12 +213,12 @@ def transform_model(
             compression_config = wc_params.compression_config
             node = wc_params.node_with_weight
             weight = self.get_weight(node, wc_params.weight_port_id, model, graph)
+            precomputed_compressed_weights = precomputed_compressed_weights or {}
             compressed_weight = compress_weight(
                 Tensor(weight),
                 wc_params.reduction_axes,
                 compression_config,
-                None if precomputed_scales is None else precomputed_scales.get(wc_params.weight_name),
-                None if precomputed_zero_points is None else precomputed_zero_points.get(wc_params.weight_name),
+                precomputed_compressed_weights.get(wc_params.weight_name),
             )
             dequantize_block_size = max(compression_config.group_size, 0)  # 0 - is no block wise quantization
             dequantize_axis = (
diff --git a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
index 7c1838eb8d2..6215fb4b1ee 100644
--- a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -32,6 +32,7 @@
 from nncf.openvino.graph.metatypes.groups import ATOMIC_ACTIVATIONS_OPERATIONS
 from nncf.openvino.graph.model_transformer import OVModelTransformer
 from nncf.openvino.graph.node_utils import convert_op
+from nncf.openvino.graph.node_utils import create_ov_codebook_subgraph
 from nncf.openvino.graph.node_utils import create_ov_const_from_tensor
 from nncf.openvino.graph.node_utils import get_const_value_as_numpy_tensor
 from nncf.openvino.graph.node_utils import get_const_value_as_ov_tensor
@@ -57,6 +58,7 @@
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.handle_errors import handle_invalid_group_size_error
 from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.algorithms.weight_compression.weight_lowering import compress_weight
 from nncf.tensor import Tensor
 from nncf.tensor.definitions import TensorDataType
@@ -217,8 +219,7 @@ def _create_compression_subgraph(
         weight_port_id: int,
         const_dtype,
         should_add_convert_node: bool,
-        layer_scales: Optional[Tensor] = None,
-        layer_zero_points: Optional[Tensor] = None,
+        precomputed_compressed_weight: Optional[CompressedWeight] = None,
     ):
         scale_dtype = ov.Type.f16
         if compression_config.mode == CompressWeightsMode.NF4:
@@ -234,32 +235,47 @@ def _create_compression_subgraph(
             compression_dtype = ov.Type.i8
         elif compression_config.mode == CompressWeightsMode.INT8_ASYM:
             compression_dtype = ov.Type.u8
+        elif compression_config.is_codebook:
+            compression_dtype = None
         else:
             msg = f"{compression_config.mode.value} is not supported."
             raise nncf.ParameterNotSupportedError(msg)
 
         original_shape = weight.shape
+
         with disable_results_caching(OV_MODEL_CACHE):
             compressed_weight = compress_weight(
                 weight,
                 reduction_axes,
                 compression_config,
-                layer_scales,
-                layer_zero_points,
+                precomputed_compressed_weight,
             )
-        compressed_const = create_ov_const_from_tensor(
-            compressed_weight.tensor, compression_dtype, name=const_node_name
-        )
-        converted_const = opset.convert(compressed_const, ov.Type.f16)
 
-        if compressed_weight.zero_point is not None:
-            zero_point_const = create_ov_const_from_tensor(
-                compressed_weight.zero_point, compression_dtype, name=f"{const_node_name}/zero_point"
+        if compression_config.is_codebook:
+            n_quants = compressed_weight.codebook.size - 1
+            compression_dtype = ov.Type.u16 if n_quants > 255 else (ov.Type.u8 if n_quants > 15 else ov.Type.u4)
+            converted_const = create_ov_codebook_subgraph(
+                codebook=compressed_weight.codebook
+                if compression_config.mode == CompressWeightsMode.CODEBOOK
+                else compressed_weight.codebook.as_openvino_tensor().astype(TensorDataType.f8e4m3),
+                indexes=compressed_weight.tensor,
+                dtype=compression_dtype,
+                name=const_node_name,
             )
-            zero_point_const = opset.convert(zero_point_const, ov.Type.f16)
-            converted_const = opset.subtract(
-                converted_const, zero_point_const, name=f"{const_node_name}/zero_point/subtract"
+        else:
+            compressed_const = create_ov_const_from_tensor(
+                compressed_weight.tensor, compression_dtype, name=const_node_name
             )
+            converted_const = opset.convert(compressed_const, ov.Type.f16)
+
+            if compressed_weight.zero_point is not None:
+                zero_point_const = create_ov_const_from_tensor(
+                    compressed_weight.zero_point, compression_dtype, name=f"{const_node_name}/zero_point"
+                )
+                zero_point_const = opset.convert(zero_point_const, ov.Type.f16)
+                converted_const = opset.subtract(
+                    converted_const, zero_point_const, name=f"{const_node_name}/zero_point/subtract"
+                )
 
         scale_const = create_ov_const_from_tensor(compressed_weight.scale, scale_dtype, name=f"{const_node_name}/scale")
         scale_const = convert_op(scale_const, ov.Type.f16)
@@ -282,9 +298,8 @@ def transform_model(
         model: ov.Model,
         graph: NNCFGraph,
         weight_compression_parameters: Iterable[WeightCompressionParameters],
-        precomputed_scales: dict[str, Tensor] = None,
-        precomputed_zero_points: dict[str, Tensor] = None,
-        lora_correction_algo: LoraCorrectionAlgorithm = None,
+        precomputed_compressed_weights: Optional[dict[str, CompressedWeight]] = None,
+        lora_correction_algo: Optional[LoraCorrectionAlgorithm] = None,
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
     ) -> ov.Model:
@@ -308,10 +323,6 @@ def transform_model(
                         should_add_convert_node = True
                         break
 
-            layer_scales = None if precomputed_scales is None else precomputed_scales.get(wc_params.weight_name)
-            layer_zero_points = (
-                None if precomputed_zero_points is None else precomputed_zero_points.get(wc_params.weight_name)
-            )
             try:
                 mul, compressed_weight = self._create_compression_subgraph(
                     weight=weight,
@@ -321,8 +332,9 @@ def transform_model(
                     weight_port_id=wc_params.weight_port_id,
                     const_dtype=const_dtype,
                     should_add_convert_node=should_add_convert_node,
-                    layer_scales=layer_scales,
-                    layer_zero_points=layer_zero_points,
+                    precomputed_compressed_weight=None
+                    if precomputed_compressed_weights is None
+                    else precomputed_compressed_weights.get(wc_params.weight_name),
                 )
             except nncf.InvalidGroupSizeError as error:
                 first_caught_error = error
diff --git a/src/nncf/quantization/algorithms/weight_compression/parameters.py b/src/nncf/quantization/algorithms/weight_compression/parameters.py
new file mode 100644
index 00000000000..fb27775997d
--- /dev/null
+++ b/src/nncf/quantization/algorithms/weight_compression/parameters.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Optional
+
+from nncf.tensor import Tensor
+
+
+@dataclass
+class CompressedWeight:
+    """
+    Compressed weight and decompression parameters.
+
+    :param tensor: The tensor with compressed weight.
+    :param scale: The decompression scale, in practice it is dequantization scale for the quantization.
+    :param zero_point: The zero-point, it is the value of the compression type corresponding to the value 0
+        in the non-compression realm. Applicable for INT quantization.
+    :param codebook: The codebook (LUT) for the weight compression. Applicable for vector quantization
+    """
+
+    tensor: Optional[Tensor] = None
+    scale: Optional[Tensor] = None
+    zero_point: Optional[Tensor] = None
+    codebook: Optional[Tensor] = None
+
+    def is_codebook(self):
+        """
+        Check if the compressed weight is a codebook.
+
+        :return: True if the compressed weight is a codebook, False otherwise.
+        """
+        return self.codebook is not None and self.tensor is not None and self.scale is not None
diff --git a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
index 34921ad1563..d7c63c3d1e8 100644
--- a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
+++ b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
@@ -18,12 +18,12 @@
 from nncf.common.utils.backend import BackendType
 from nncf.common.utils.backend import get_backend
 from nncf.experimental.common.tensor_statistics.statistics import WCTensorStatistic
-from nncf.parameters import CompressWeightsMode
 from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.handle_errors import handle_invalid_group_size_error
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.algorithms.weight_compression.weight_lowering import do_float_quantization
 from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization
 from nncf.quantization.algorithms.weight_compression.weight_lowering import float_quantize_dequantize_weight
@@ -98,7 +98,7 @@ def apply(
         all_weight_params: list[WeightCompressionParameters],
         statistics: dict[str, WCTensorStatistic],
         backend_entity: Optional[WeightCompressionAlgoBackend] = None,
-    ) -> tuple[dict[str, Tensor], dict[str, Tensor]]:
+    ) -> dict[str, CompressedWeight]:
         """
         Estimates better scale for the int4 nodes in the model.
         Minimizes per-group difference between floating point MatMul and
@@ -118,7 +118,7 @@ def apply(
         self._backend_entity = backend_entity
         if self._backend_entity is None:
             self._set_backend_entity(model)
-        scales, zero_points = dict(), dict()
+        res = dict()
 
         invalid_node_names = []
         first_caught_error = None
@@ -128,7 +128,7 @@ def apply(
             config = wp.compression_config
 
             if config.num_bits != 4 or node_name not in statistics:
-                scales[weight_name] = None
+                res[weight_name] = CompressedWeight()
                 continue
 
             stats = statistics[node_name]
@@ -141,7 +141,7 @@ def apply(
             weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)
 
             try:
-                scales[weight_name], zero_points[weight_name] = self.calculate_quantization_params(
+                scale, zero_point = self.calculate_quantization_params(
                     stats,
                     weight,
                     wp.reduction_axes,
@@ -151,6 +151,7 @@ def apply(
                     self._scale_steps,
                     self._weight_penalty,
                 )
+                res[weight_name] = CompressedWeight(None, scale, zero_point, None)
             except nncf.InvalidGroupSizeError as error:
                 first_caught_error = error
                 invalid_node_names.append(wp.node_with_weight.node_name)
@@ -158,7 +159,7 @@ def apply(
         if first_caught_error:
             handle_invalid_group_size_error(first_caught_error, invalid_node_names)
 
-        return scales, zero_points
+        return res
 
     @staticmethod
     def calculate_quantization_params(
@@ -211,7 +212,7 @@ def calculate_quantization_params(
         cur_config.group_size = group_size
 
         original_weight = fns.zeros_like(weight) + weight
-        if config.mode == CompressWeightsMode.NF4:
+        if not config.is_integer:
             q_weights, compressed_weights, scale = float_quantize_dequantize_weight(
                 original_weight, cur_config, reduction_axis, return_compressed_weight=True
             )
@@ -260,7 +261,7 @@ def calculate_quantization_params(
             near_to_ideal_scale = estimate_scales(original_weight, target, zero_mask, importance)
             near_to_ideal_scale = near_to_ideal_scale * scale_sign
 
-            if config.mode == CompressWeightsMode.NF4:
+            if not config.is_integer:
                 out = float_quantize_dequantize_weight(
                     original_weight,
                     config,
@@ -298,8 +299,8 @@ def calculate_quantization_params(
             result_scale = near_to_ideal_scale
 
             if i < initial_steps - 1:
-                if config.mode == CompressWeightsMode.NF4:
-                    out, _ = do_float_quantization(original_weight, config, precomputed_scale=near_to_ideal_scale)
+                if not config.is_integer:
+                    out, _, _ = do_float_quantization(original_weight, config, precomputed_scale=near_to_ideal_scale)
                 else:
                     out, _, _ = do_integer_quantization(
                         original_weight,
@@ -316,8 +317,8 @@ def calculate_quantization_params(
             factor = 1.0 - 0.05 * scale_step
             scaled_scale = factor * scale
 
-            if config.mode == CompressWeightsMode.NF4:
-                out, _ = do_float_quantization(original_weight, config, precomputed_scale=scaled_scale)
+            if not config.is_integer:
+                out, _, _ = do_float_quantization(original_weight, config, precomputed_scale=scaled_scale)
             else:
                 out, _, _ = do_integer_quantization(
                     original_weight,
@@ -332,7 +333,7 @@ def calculate_quantization_params(
             near_to_ideal_scale = estimate_scales(original_weight, target, zero_mask, importance)
             near_to_ideal_scale = near_to_ideal_scale * scale_sign
 
-            if config.mode == CompressWeightsMode.NF4:
+            if not config.is_integer:
                 out = float_quantize_dequantize_weight(original_weight, config, precomputed_scale=near_to_ideal_scale)
             else:
                 out = integer_quantize_dequantize_weight(
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
index 13e6abc751a..7e5c348f3a9 100644
--- a/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
@@ -48,7 +48,7 @@
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.handle_errors import handle_invalid_group_size_error
 from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
-from nncf.quantization.algorithms.weight_compression.weight_lowering import CompressedWeight
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.algorithms.weight_compression.weight_lowering import compress_weight
 from nncf.tensor import Tensor
 from nncf.tensor.definitions import TensorDataType
@@ -456,9 +456,8 @@ def transform_model(
         model: Union[GraphModelWrapper, torch.nn.Module],
         graph: NNCFGraph,
         weight_compression_parameters: Iterable[WeightCompressionParameters],
-        precomputed_scales: dict[str, Tensor] = None,
-        precomputed_zero_points: dict[str, Tensor] = None,
-        lora_correction_algo: LoraCorrectionAlgorithm = None,
+        precomputed_compressed_weights: Optional[dict[str, CompressedWeight]] = None,
+        lora_correction_algo: Optional[LoraCorrectionAlgorithm] = None,
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
     ) -> NNCFNetwork:
@@ -489,13 +488,13 @@ def transform_model(
                 raise nncf.InternalError(msg)
 
             try:
+                precomputed_compressed_weights = precomputed_compressed_weights or {}
                 # calculates compressed weights and decompression parameters
                 compressed_weight = compress_weight(
                     Tensor(weight),
                     wc_params.reduction_axes,
                     compression_config,
-                    None if precomputed_scales is None else precomputed_scales.get(wc_params.weight_name),
-                    None if precomputed_zero_points is None else precomputed_zero_points.get(wc_params.weight_name),
+                    precomputed_compressed_weights.get(wc_params.weight_name),
                 )
             except nncf.InvalidGroupSizeError as error:
                 first_caught_error = error
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
index 2650f16600c..396f125ca7b 100644
--- a/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
@@ -43,6 +43,7 @@
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.handle_errors import handle_invalid_group_size_error
 from nncf.quantization.algorithms.weight_compression.lora_correction import LoraCorrectionAlgorithm
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.algorithms.weight_compression.torch_backend import PTAWQAlgoAlgoBackend
 from nncf.quantization.algorithms.weight_compression.torch_backend import PTMixedPrecisionAlgoBackend
 from nncf.quantization.algorithms.weight_compression.torch_backend import PTWeightCompressionAlgoBackend
@@ -189,9 +190,8 @@ def transform_model(
         model: torch.fx.GraphModule,
         graph: NNCFGraph,
         weight_compression_parameters: Iterable[WeightCompressionParameters],
-        precomputed_scales: dict[str, Tensor] = None,
-        precomputed_zero_points: dict[str, Tensor] = None,
-        lora_correction_algo: LoraCorrectionAlgorithm = None,
+        precomputed_compressed_weights: Optional[dict[str, CompressedWeight]] = None,
+        lora_correction_algo: Optional[LoraCorrectionAlgorithm] = None,
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: AdvancedCompressionParameters = AdvancedCompressionParameters(),
     ) -> torch.fx.GraphModule:
@@ -218,8 +218,9 @@ def transform_model(
                     weight,
                     wc_params.reduction_axes,
                     compression_config,
-                    None if precomputed_scales is None else precomputed_scales.get(wc_params.weight_name),
-                    None if precomputed_zero_points is None else precomputed_zero_points.get(wc_params.weight_name),
+                    None
+                    if precomputed_compressed_weights is None
+                    else precomputed_compressed_weights.get(wc_params.weight_name),
                 )
             except nncf.InvalidGroupSizeError as error:
                 first_caught_error = error
diff --git a/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py
index 12522e0fa60..8a1ee8f9b40 100644
--- a/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py
+++ b/src/nncf/quantization/algorithms/weight_compression/weight_lowering.py
@@ -9,17 +9,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from dataclasses import dataclass
 from typing import Optional, Union
 
-import numpy as np
-
 import nncf
 from nncf.common.logging.logger import nncf_logger
 from nncf.common.utils.backend import is_openvino_at_least
 from nncf.common.utils.backend import is_openvino_available
+from nncf.errors import InvalidGroupSizeError
+from nncf.errors import UnsupportedModelError
 from nncf.parameters import CompressWeightsMode
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
+from nncf.quantization.algorithms.weight_compression.constants import CENTER_OF_NF4_QUANTILES
+from nncf.quantization.algorithms.weight_compression.constants import NF4_QUANTILES
+from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.fake_quantize import calculate_scale_zero_point
 from nncf.tensor import Tensor
 from nncf.tensor import functions as fns
@@ -28,69 +30,9 @@
 
 ReductionAxes = Union[int, tuple[int, ...]]
 
-NF4_QUANTILES = np.array(
-    [
-        -1.0,
-        -0.6961928009986877,
-        -0.5250730514526367,
-        -0.39491748809814453,
-        -0.28444138169288635,
-        -0.18477343022823334,
-        -0.09105003625154495,
-        0.0,
-        0.07958029955625534,
-        0.16093020141124725,
-        0.24611230194568634,
-        0.33791524171829224,
-        0.44070982933044434,
-        0.5626170039176941,
-        0.7229568362236023,
-        1.0,
-    ],
-    dtype=np.float32,
-)
-
-CENTER_OF_NF4_QUANTILES = np.array(
-    [
-        -0.84809643,
-        -0.6106329,
-        -0.45999527,
-        -0.33967942,
-        -0.2346074,
-        -0.13791174,
-        -0.045525018,
-        0.03979015,
-        0.120255254,
-        0.20352125,
-        0.29201376,
-        0.38931254,
-        0.5016634,
-        0.6427869,
-        0.8614784,
-    ],
-    dtype=np.float32,
-)
-
-
 MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION = 10000
 
 
-@dataclass
-class CompressedWeight:
-    """
-    Compressed weight and decompression parameters.
-
-    :param tensor: The tensor with compressed weight.
-    :param scale: The decompression scale, in practice it is dequantization scale for the INT quantization.
-    :param zero_point: The zero-point, it is the value of the compression type corresponding to the value 0
-        in the non-compression realm. Applicable for INT quantization.
-    """
-
-    tensor: Tensor
-    scale: Tensor
-    zero_point: Optional[Tensor] = None
-
-
 def reshape_weight_for_grouped_quantization(
     weight: Tensor, reduction_axes: ReductionAxes, group_size: int
 ) -> tuple[Tensor, int]:
@@ -109,11 +51,11 @@ def reshape_weight_for_grouped_quantization(
         reduction_axes = reduction_axes[0]
     if not isinstance(reduction_axes, int):
         msg = f"Group-wise quantization expects a single reduction axis, but given: {reduction_axes}."
-        raise nncf.UnsupportedModelError(msg)
+        raise UnsupportedModelError(msg)
     channel_size = weight.shape[reduction_axes]
     if channel_size % group_size != 0:
         msg = f"Channel size {channel_size} should be divisible by size of group {group_size}."
-        raise nncf.InvalidGroupSizeError(msg)
+        raise InvalidGroupSizeError(msg)
 
     num_groups_per_channel = channel_size // group_size
     shape = list(weight.shape)  # [a1, r, a2] - "r" refers to number of channels along reduction axis
@@ -124,7 +66,7 @@ def reshape_weight_for_grouped_quantization(
 
 
 def calculate_float_quantization_params(
-    weight: Tensor, reduction_axes: ReductionAxes, config: WeightCompressionConfig, max_val=6.0
+    weight: Tensor, reduction_axes: ReductionAxes, config: WeightCompressionConfig
 ) -> Tensor:
     """
     Calculates the scale for nf4 or e2m1 quantization.
@@ -132,22 +74,23 @@ def calculate_float_quantization_params(
     :param weight: Weight array to compress.
     :param reduction_axes: Axes along which to reduce (collect) different statistics (e.g., min, max).
     :param config: Weight compression configuration.
-    :param max_val: Maximal value of e2m1 type.
     :return: Scale tensor of float32 type for float quantization.
     """
-    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
+    assert not config.is_integer
 
     if weight.dtype != TensorDataType.float32:
         weight = weight.astype(TensorDataType.float32)
 
     scale = fns.max(fns.abs(weight), axis=reduction_axes, keepdims=True)
+    if config.mode in [CompressWeightsMode.E2M1, CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]:
+        max_val = 6.0 if config.mode == CompressWeightsMode.E2M1 else fns.max(fns.abs(config.get_numpy_codebook()))
+        scale = scale / max_val
 
     # NOTE: adding machine epsilon to avoid division by zero
     eps = fns.finfo(weight).eps
     scale = fns.where(fns.abs(scale) < eps, eps, scale)
 
     if config.mode == CompressWeightsMode.E2M1:
-        scale = scale / max_val
         scale = fns.log2(scale)
         scale = fns.ceil(scale)
         scale = fns.clip(scale, -127, 127)
@@ -177,20 +120,21 @@ def do_float_quantization(
     config: WeightCompressionConfig,
     reduction_axes: Optional[ReductionAxes] = None,
     precomputed_scale: Optional[Tensor] = None,
-) -> tuple[Tensor, Tensor]:
+) -> tuple[Tensor, Tensor, Tensor]:
     """
     Computes quantization scale if not provided, and performs corresponding (nf4, e2m1) weight quantization.
     For NF4 quantization quantizes the weights to 16 levels on [-1, 1] interval.
-    For E2M1 currently returns normalized weight without quantization.
+    For E2M1 and CODEBOOK currently returns normalized weight without quantization.
     TODO(nikita-savelyevv): add support for E2M1 once ticket 164851 is resolved
 
     :param weight: Weight array to compress.
     :param config: Weight compression configuration.
     :param reduction_axes: Axes, along which to reduce (collect) different statistics.
     :param precomputed_scale: Optional precomputed scale.
-    :return: Returns quantized (for e2m1 normalized) weight tensor and corresponding scale tensor.
+    :return: Returns quantized (for e2m1 normalized) weight tensor and corresponding scale tensor and
+             optional indexes for codebook.
     """
-    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
+    assert not config.is_integer
 
     if config.group_size != -1 and reduction_axes is not None:
         # weights are reshaped: [a1, r, a2] -> [a1, r//gs, gs, a2]
@@ -218,10 +162,15 @@ def do_float_quantization(
             compressed_weight = norm_weight.as_openvino_tensor().astype(TensorDataType.nf4)
         else:
             compressed_weight = _calculate_nf4_quantized_weight(norm_weight)
+    elif config.is_codebook:
+        compressed_weight, indexes = _calculate_codebook_quantized_weight(
+            norm_weight, quantiles=config.get_numpy_codebook()
+        )
+        return compressed_weight, scale, indexes
     else:
         # TODO(nikita-savelyevv): add support for E2M1 once ticket 164851 is resolved
         compressed_weight = norm_weight
-    return compressed_weight, scale
+    return compressed_weight, scale, None
 
 
 def float_quantize_dequantize_weight(
@@ -242,11 +191,11 @@ def float_quantize_dequantize_weight(
     :param return_compressed_weight: If True, besides decompressed weight will also return compressed weight and scale.
     :return: Dequantized weight tensor or a tuple containing the decompressed weight, compressed weight and scale.
     """
-    assert config.mode == CompressWeightsMode.NF4
+    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]
     # TODO(nikita-savelyevv): add support for f4e2m1 once ticket 164851 is resolved
 
     # Optimized implementation
-    if _can_run_optimized(weight):
+    if config.mode == CompressWeightsMode.NF4 and _can_run_optimized(weight):
         from nncf.openvino.optimized_functions import (
             float_quantize_dequantize_weight as float_quantize_dequantize_weight_ov,
         )
@@ -260,7 +209,7 @@ def float_quantize_dequantize_weight(
         )
 
     # Reference implementation
-    compressed_weight, scale = do_float_quantization(weight, config, reduction_axes, precomputed_scale)
+    compressed_weight, scale, _ = do_float_quantization(weight, config, reduction_axes, precomputed_scale)
     decompressed_weight = do_float_dequantization(compressed_weight, scale)
     if return_compressed_weight:
         return decompressed_weight, compressed_weight, scale
@@ -350,8 +299,7 @@ def compress_weight(
     weight: Tensor,
     reduction_axes: ReductionAxes,
     config: WeightCompressionConfig,
-    precomputed_scale: Tensor = None,
-    precomputed_zero_point: Tensor = None,
+    precomputed_compressed_weight: CompressedWeight = None,
 ) -> CompressedWeight:
     """
     Compress weight using compression configuration.
@@ -359,13 +307,26 @@ def compress_weight(
     :param weight: The weight to compress.
     :param reduction_axes: Axes, along which to reduce (collect) different statistics (e.g. min, max).
     :param config: Compression configuration.
-    :param precomputed_scale: Precomputed scale.
-    :param precomputed_zero_point: Precomputed zero point.
+    :param precomputed_compressed_weight: Contains precomputed scale and zero point.
     :return: The compressed weight and decompression parameters as instance of CompressedWeight
     """
+    precomputed_scale, precomputed_zero_point = (
+        (precomputed_compressed_weight.scale, precomputed_compressed_weight.zero_point)
+        if precomputed_compressed_weight
+        else (None, None)
+    )
+
     if not config.is_integer:
-        compressed_weight, scale = do_float_quantization(weight, config, reduction_axes, precomputed_scale)
-        return CompressedWeight(compressed_weight, scale)
+        compressed_weight, scale, indexes = do_float_quantization(weight, config, reduction_axes, precomputed_scale)
+        if indexes is not None:
+            return CompressedWeight(
+                indexes,
+                scale,
+                None,
+                config.codebook_values,
+            )
+        else:
+            return CompressedWeight(compressed_weight, scale)
     compressed_weight, scale, zero_point = do_integer_quantization(
         weight, config, reduction_axes, precomputed_scale, precomputed_zero_point
     )
@@ -537,6 +498,32 @@ def _calculate_nf4_quantized_weight(norm_weight: Tensor) -> Tensor:
     return quantized_weight
 
 
+def _calculate_codebook_quantized_weight(
+    norm_weight: Tensor, quantiles: Tensor = None, center_of_quantiles: Tensor = None
+) -> tuple[Tensor, Tensor]:
+    """
+    Performs quantization by quantiles (if center_of_quantiles is None). Look-up table is used to
+    "round" or "quantize" to the closest quant.
+
+    :param norm_weight: Weight tensor to quantize already normalized to quantiles range.
+    :param quantiles: Quantiles to use for quantization. If None, the center_of_quantiles must be provided.
+    :param center_of_quantiles: Center of quantiles to use for quantization. If None, it is calculated as the average
+        of adjacent quantiles.
+    :return: Tensor with floating-point values, where each of them corresponds to elements from quantiles.
+    """
+    assert quantiles is not None or center_of_quantiles is not None, (
+        "Either quantiles or center_of_quantiles should be provided"
+    )
+
+    if center_of_quantiles is None:
+        center_of_quantiles = 0.5 * (quantiles[1:] + quantiles[:-1])
+    center_of_quantiles = fns.from_numpy(center_of_quantiles, backend=norm_weight.backend)
+    indexes = fns.searchsorted(center_of_quantiles, norm_weight)
+    quantiles = fns.from_numpy(quantiles, backend=indexes.backend)
+    quantized_weight = quantiles[indexes]
+    return quantized_weight, indexes
+
+
 def _calculate_normalized_weight(weight: Tensor, scale: Tensor) -> Tensor:
     """
     Normalizes the weight tensor using the provided scale.
diff --git a/src/nncf/quantization/quantize_model.py b/src/nncf/quantization/quantize_model.py
index f5435197e3c..340f5983f2b 100644
--- a/src/nncf/quantization/quantize_model.py
+++ b/src/nncf/quantization/quantize_model.py
@@ -516,8 +516,13 @@ def compress_weights(
         from nncf.torch.nncf_network import NNCFNetwork
         from nncf.torch.quantization.quantize_model import compress_weights_impl as pt_compression_weights_impl
 
-        if mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]:
-            msg = "Torch backend does not support NF4 and E2M1 modes for weight compression."
+        if mode in [
+            CompressWeightsMode.NF4,
+            CompressWeightsMode.E2M1,
+            CompressWeightsMode.CODEBOOK,
+            CompressWeightsMode.CB4_F8E4M3,
+        ]:
+            msg = "Torch backend does not support NF4, E2M1 and CODEBOOK modes for weight compression."
             raise nncf.ParameterNotSupportedError(msg)
 
         options = {"gptq": gptq, "lora_correction": lora_correction}
@@ -560,8 +565,13 @@ def compress_weights(
             compress_weights_impl as fx_compression_weights_impl,
         )
 
-        if mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]:
-            msg = "Torch backend does not support NF4 and E2M1 modes for weight compression."
+        if mode in [
+            CompressWeightsMode.NF4,
+            CompressWeightsMode.E2M1,
+            CompressWeightsMode.CODEBOOK,
+            CompressWeightsMode.CB4_F8E4M3,
+        ]:
+            msg = "Torch backend does not support NF4, E2M1 and CODEBOOK modes for weight compression."
             raise nncf.ParameterNotSupportedError(msg)
 
         options = {
@@ -614,8 +624,13 @@ def compress_weights(
     elif backend == BackendType.ONNX:
         from nncf.onnx.quantization.quantize_model import compress_weights_impl as onnx_compress_weights_impl
 
-        if mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]:
-            msg = "ONNX backend does not support NF4 and E2M1 modes for weight compression."
+        if mode in [
+            CompressWeightsMode.NF4,
+            CompressWeightsMode.E2M1,
+            CompressWeightsMode.CODEBOOK,
+            CompressWeightsMode.CB4_F8E4M3,
+        ]:
+            msg = "ONNX backend does not support NF4, E2M1 and CODEBOOK modes for weight compression."
             raise nncf.ParameterNotSupportedError(msg)
 
         options = {
diff --git a/tests/cross_fw/examples/example_scope.json b/tests/cross_fw/examples/example_scope.json
index fa2080ba064..e027db051c2 100644
--- a/tests/cross_fw/examples/example_scope.json
+++ b/tests/cross_fw/examples/example_scope.json
@@ -283,6 +283,23 @@
             ]
         }
     },
+    "codebook_llm_compression": {
+        "backend": "openvino",
+        "requirements": "examples/llm_compression/openvino/smollm2_360m_codebook/requirements.txt",
+        "cpu": "Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz",
+        "accuracy_metrics": {
+            "answers": [
+                "Paris.",
+                "Mont Blanc.",
+                "Toronto.",
+                "Tokyo.",
+                "Paris.",
+                "Mont Blanc.",
+                "Toronto.",
+                "Fukuoka."
+            ]
+        }
+    },
     "llm_compression_distillation_qat_with_lora": {
         "backend": "torch",
         "device": "cuda",
diff --git a/tests/cross_fw/examples/run_example.py b/tests/cross_fw/examples/run_example.py
index 2931fba1186..c9ead09f7bf 100644
--- a/tests/cross_fw/examples/run_example.py
+++ b/tests/cross_fw/examples/run_example.py
@@ -200,6 +200,12 @@ def fp8_llm_quantization() -> dict[str, float]:
     return {"answers": list(result.values())}
 
 
+def codebook_llm_compression() -> list[str]:
+    from examples.llm_compression.openvino.smollm2_360m_codebook.main import main as codebook_llm_compression_main
+
+    return codebook_llm_compression_main()
+
+
 def llm_compression_distillation_qat_with_lora() -> float:
     from examples.llm_compression.torch.distillation_qat_with_lora.main import main as distillation_qat_with_lora_main
 
diff --git a/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_f16_u4.json b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_f16_u4.json
new file mode 100644
index 00000000000..578b2cc53d3
--- /dev/null
+++ b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_f16_u4.json
@@ -0,0 +1,61 @@
+{
+    "matmul_2_data_nncf_codebook_idxs": {
+        "indexes": [
+            171,
+            253,
+            154,
+            172,
+            217,
+            235,
+            250,
+            155,
+            253,
+            252,
+            188,
+            253,
+            207,
+            206,
+            253,
+            236,
+            254,
+            233,
+            255,
+            248,
+            255
+        ]
+    },
+    "matmul_2_data": {
+        "scale": [
+            [
+                [
+                    0.99560546875
+                ]
+            ],
+            [
+                [
+                    1.177734375
+                ]
+            ],
+            [
+                [
+                    1.193359375
+                ]
+            ],
+            [
+                [
+                    1.244140625
+                ]
+            ],
+            [
+                [
+                    1.1650390625
+                ]
+            ],
+            [
+                [
+                    1.2265625
+                ]
+            ]
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_f8e4m3_u8.json b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_f8e4m3_u8.json
new file mode 100644
index 00000000000..abf99c05ca4
--- /dev/null
+++ b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_f8e4m3_u8.json
@@ -0,0 +1,106 @@
+{
+    "matmul_2_data_nncf_codebook_idxs": {
+        "indexes": [
+            [
+                [
+                    14,
+                    12,
+                    16,
+                    20,
+                    13,
+                    11,
+                    15
+                ]
+            ],
+            [
+                [
+                    12,
+                    11,
+                    16,
+                    13,
+                    17,
+                    12,
+                    20
+                ]
+            ],
+            [
+                [
+                    14,
+                    11,
+                    17,
+                    20,
+                    15,
+                    20,
+                    15
+                ]
+            ],
+            [
+                [
+                    14,
+                    16,
+                    20,
+                    19,
+                    15,
+                    18,
+                    15
+                ]
+            ],
+            [
+                [
+                    16,
+                    18,
+                    14,
+                    18,
+                    18,
+                    20,
+                    11
+                ]
+            ],
+            [
+                [
+                    17,
+                    19,
+                    20,
+                    10,
+                    19,
+                    20,
+                    20
+                ]
+            ]
+        ]
+    },
+    "matmul_2_data": {
+        "scale": [
+            [
+                [
+                    0.2275390625
+                ]
+            ],
+            [
+                [
+                    0.269287109375
+                ]
+            ],
+            [
+                [
+                    0.272705078125
+                ]
+            ],
+            [
+                [
+                    0.284423828125
+                ]
+            ],
+            [
+                [
+                    0.266357421875
+                ]
+            ],
+            [
+                [
+                    0.2802734375
+                ]
+            ]
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_i8_u8.json b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_i8_u8.json
new file mode 100644
index 00000000000..acf5ad93048
--- /dev/null
+++ b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_i8_u8.json
@@ -0,0 +1,106 @@
+{
+    "matmul_2_data_nncf_codebook_idxs": {
+        "indexes": [
+            [
+                [
+                    14,
+                    12,
+                    16,
+                    20,
+                    13,
+                    11,
+                    15
+                ]
+            ],
+            [
+                [
+                    12,
+                    11,
+                    16,
+                    13,
+                    17,
+                    12,
+                    20
+                ]
+            ],
+            [
+                [
+                    14,
+                    11,
+                    17,
+                    20,
+                    15,
+                    20,
+                    15
+                ]
+            ],
+            [
+                [
+                    14,
+                    16,
+                    20,
+                    20,
+                    15,
+                    18,
+                    15
+                ]
+            ],
+            [
+                [
+                    16,
+                    18,
+                    14,
+                    18,
+                    18,
+                    20,
+                    11
+                ]
+            ],
+            [
+                [
+                    17,
+                    19,
+                    20,
+                    10,
+                    19,
+                    20,
+                    20
+                ]
+            ]
+        ]
+    },
+    "matmul_2_data": {
+        "scale": [
+            [
+                [
+                    0.07965087890625
+                ]
+            ],
+            [
+                [
+                    0.09423828125
+                ]
+            ],
+            [
+                [
+                    0.095458984375
+                ]
+            ],
+            [
+                [
+                    0.0994873046875
+                ]
+            ],
+            [
+                [
+                    0.09320068359375
+                ]
+            ],
+            [
+                [
+                    0.09814453125
+                ]
+            ]
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_u8_u4.json b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_u8_u4.json
new file mode 100644
index 00000000000..8642e52a868
--- /dev/null
+++ b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_codebook_u8_u4.json
@@ -0,0 +1,61 @@
+{
+    "matmul_2_data_nncf_codebook_idxs": {
+        "indexes": [
+            54,
+            248,
+            20,
+            56,
+            145,
+            181,
+            243,
+            38,
+            250,
+            247,
+            104,
+            249,
+            126,
+            123,
+            217,
+            199,
+            251,
+            178,
+            254,
+            208,
+            255
+        ]
+    },
+    "matmul_2_data": {
+        "scale": [
+            [
+                [
+                    0.0531005859375
+                ]
+            ],
+            [
+                [
+                    0.06280517578125
+                ]
+            ],
+            [
+                [
+                    0.06365966796875
+                ]
+            ],
+            [
+                [
+                    0.06634521484375
+                ]
+            ],
+            [
+                [
+                    0.0621337890625
+                ]
+            ],
+            [
+                [
+                    0.0654296875
+                ]
+            ]
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4_f8e4m3.json b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4_f8e4m3.json
new file mode 100644
index 00000000000..b8712bf3839
--- /dev/null
+++ b/tests/openvino/native/data/2025.2/reference_scales/IntegerModel_compressed_weights_cb4_f8e4m3.json
@@ -0,0 +1,178 @@
+{
+    "matmul_2_data": {
+        "scale": [
+            [
+                [
+                    0.2275390625
+                ]
+            ],
+            [
+                [
+                    0.269287109375
+                ]
+            ],
+            [
+                [
+                    0.272705078125
+                ]
+            ],
+            [
+                [
+                    0.284423828125
+                ]
+            ],
+            [
+                [
+                    0.266357421875
+                ]
+            ],
+            [
+                [
+                    0.2802734375
+                ]
+            ]
+        ]
+    },
+    "matmul_1_data": {
+        "compressed_weight": [
+            [
+                119,
+                168,
+                11,
+                49,
+                255,
+                255
+            ],
+            [
+                255,
+                159,
+                255,
+                255,
+                255,
+                255
+            ],
+            [
+                255,
+                169,
+                59,
+                255,
+                228,
+                135
+            ],
+            [
+                202,
+                255,
+                255,
+                149,
+                238,
+                134
+            ],
+            [
+                229,
+                130,
+                151,
+                255,
+                87,
+                240
+            ],
+            [
+                26,
+                255,
+                245,
+                75,
+                255,
+                18
+            ]
+        ],
+        "zero_point": [
+            [
+                0
+            ],
+            [
+                0
+            ],
+            [
+                0
+            ],
+            [
+                0
+            ],
+            [
+                0
+            ],
+            [
+                0
+            ]
+        ],
+        "scale": [
+            [
+                0.0025196075439453125
+            ],
+            [
+                0.0024051666259765625
+            ],
+            [
+                0.002300262451171875
+            ],
+            [
+                0.0024013519287109375
+            ],
+            [
+                0.0025997161865234375
+            ],
+            [
+                0.003208160400390625
+            ]
+        ]
+    },
+    "gather_2_data": {
+        "compressed_weight": [
+            [
+                181,
+                77,
+                12,
+                5,
+                231,
+                255
+            ],
+            [
+                166,
+                200,
+                149,
+                255,
+                223,
+                1
+            ],
+            [
+                255,
+                10,
+                224,
+                54,
+                255,
+                166
+            ]
+        ],
+        "zero_point": [
+            [
+                0
+            ],
+            [
+                0
+            ],
+            [
+                0
+            ]
+        ],
+        "scale": [
+            [
+                0.0035152435302734375
+            ],
+            [
+                0.0036563873291015625
+            ],
+            [
+                0.003253936767578125
+            ]
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
index 2e15dc9a0c5..936d5d53329 100644
--- a/tests/openvino/native/quantization/test_weights_compression.py
+++ b/tests/openvino/native/quantization/test_weights_compression.py
@@ -47,6 +47,7 @@
 from nncf.quantization.algorithms.weight_compression.weight_lowering import MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION
 from nncf.quantization.algorithms.weight_compression.weight_lowering import _calculate_nf4_quantized_weight
 from nncf.quantization.algorithms.weight_compression.weight_lowering import _calculate_normalized_weight
+from nncf.quantization.algorithms.weight_compression.weight_lowering import do_float_quantization
 from nncf.quantization.algorithms.weight_compression.weight_lowering import do_integer_quantization
 from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error
 from nncf.quantization.algorithms.weight_compression.weight_lowering import reshape_weight_for_grouped_quantization
@@ -229,6 +230,57 @@ def check_nf4_grouped(op: ov.Node, group_size: int = 7):
     }
 
 
+def check_codebook_grouped(op: ov.Node, group_size: int = 7, dtype=ov.Type.f8e4m3):
+    assert op.get_element_type() == dtype
+
+    if dtype == ov.Type.f16:
+        convert_node = op
+    else:
+        convert_node = get_next_node(op)
+        assert convert_node.get_type_name() == "Convert"
+
+    gather_node = get_next_node(convert_node)
+    assert gather_node.get_type_name() == "Gather"
+
+    weight_shape = gather_node.shape
+    # NOTE: get_const_value_as_numpy_tensor doesn't work for 4-bit types
+    assert list(weight_shape)[-1] == group_size
+    reduced_weight_shape = list(weight_shape)
+    reduced_weight_shape[-1] = 1
+
+    mul_node = get_next_node(gather_node)
+    assert mul_node.get_type_name() == "Multiply"
+    scale_node = mul_node.input_value(1).get_node()
+    assert list(scale_node.shape) == reduced_weight_shape
+
+    reshape_node = get_next_node(mul_node)
+    assert reshape_node.get_type_name() == "Reshape"
+
+    convert_node = get_next_node(reshape_node)
+    assert convert_node.get_type_name() == "Convert"
+
+    return {
+        "scale": get_const_value_as_numpy_tensor(scale_node),
+    }
+
+
+def check_codebook_indexes(op: ov.Node, dtype=ov.Type.u4):
+    assert op.get_element_type() == dtype
+
+    if dtype == ov.Type.u4:
+        convert_node = get_next_node(op)
+        assert convert_node.get_type_name() == "Convert"
+    else:
+        convert_node = op
+
+    gather_node = get_next_node(convert_node)
+    assert gather_node.get_type_name() == "Gather"
+
+    return {
+        "indexes": get_const_value_as_numpy_tensor(op),
+    }
+
+
 def check_int4_sym_grouped(op: ov.Node):
     return check_int4_grouped(op, mode=CompressWeightsMode.INT4_SYM)
 
@@ -256,6 +308,7 @@ def get_mixed_mapping(primary_fn: Callable, list_layers: list[str]):
         (CompressWeightsMode.INT4_SYM, 7, get_mixed_mapping(check_int4_sym_grouped, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.INT4_ASYM, 7, get_mixed_mapping(check_int4_asym_grouped, TEST_MODELS[IntegerModel])),
         (CompressWeightsMode.NF4, 7, get_mixed_mapping(check_nf4_grouped, TEST_MODELS[IntegerModel])),
+        (CompressWeightsMode.CB4_F8E4M3, 7, get_mixed_mapping(check_codebook_grouped, TEST_MODELS[IntegerModel])),
     ),
 )
 def test_compare_compressed_weights(mode, group_size, check_fn_per_node_map):
@@ -279,6 +332,56 @@ def test_compare_compressed_weights(mode, group_size, check_fn_per_node_map):
     compare_stats(ref_stats, actual_stats)
 
 
+@pytest.mark.parametrize(
+    "codebook, codebook_dtype, index_dtype, name",
+    [
+        (np.array([i for i in range(16)], np.uint8), ov.Type.u8, ov.Type.u4, "u8_u4"),
+        (np.array([0.1 * i for i in range(-8, 8)], np.float16), ov.Type.f16, ov.Type.u4, "f16_u4"),
+        (
+            Tensor(np.array([0.35 * i for i in range(-10, 11)], np.float16))
+            .as_openvino_tensor()
+            .astype(TensorDataType.f8e4m3),
+            ov.Type.f8e4m3,
+            ov.Type.u8,
+            "f8e4m3_u8",
+        ),
+        (
+            Tensor(np.array([i for i in range(-10, 11)], np.int8)).as_openvino_tensor().astype(TensorDataType.int8),
+            ov.Type.i8,
+            ov.Type.u8,
+            "i8_u8",
+        ),
+    ],
+)
+def test_codebook_compression_for_different_dtypes(codebook, codebook_dtype, index_dtype, name):
+    model = IntegerModel().ov_model
+
+    compressed_model = compress_weights(
+        model,
+        mode=CompressWeightsMode.CODEBOOK,
+        group_size=7,
+        advanced_parameters=nncf.AdvancedCompressionParameters(codebook=codebook),
+    )
+    actual_stats = {}
+    for op in compressed_model.get_ops():
+        op_name = op.get_friendly_name()
+        if op.get_type_name() == "Constant":
+            if op_name == "matmul_2_data":
+                actual_stats[op_name] = check_codebook_grouped(op, group_size=7, dtype=codebook_dtype)
+            elif op_name == "matmul_2_data_nncf_codebook_idxs":
+                actual_stats[op_name] = check_codebook_indexes(op, dtype=index_dtype)
+
+    ref_stats_path = get_actual_reference_for_current_openvino(
+        REFERENCE_SCALES_DIR / f"IntegerModel_codebook_{name}.json"
+    )
+
+    if os.getenv("NNCF_TEST_REGEN_DOT") is not None:
+        dump_to_json(ref_stats_path, actual_stats)
+
+    ref_stats = load_json(ref_stats_path)
+    compare_stats(ref_stats, actual_stats)
+
+
 @pytest.mark.parametrize("metric", DATA_BASED_SENSITIVITY_METRICS)
 def test_gather_in_4_bit_if_all_layers_with_data(metric):
     dim1 = 2  # sequence length dimension
@@ -1024,6 +1127,76 @@ def test_mixed_precision_e2m1(mode, all_layers, ratio, ref_ids):
     assert ref_e8m0_nodes == names_e8m0
 
 
+@pytest.mark.parametrize(
+    ("mode", "all_layers", "ratio", "ref_ids"),
+    (
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 1, 5),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.8, 3),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.4, 1),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, True, 0.2, 0),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 1, 4),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.8, 3),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.4, 1),
+        (SensitivityMetric.WEIGHT_QUANTIZATION_ERROR, False, 0.2, 0),
+    ),
+)
+def test_mixed_precision_codebook(mode, all_layers, ratio, ref_ids):
+    model = SequentialMatmulModel().ov_model
+    compressed_model = compress_weights(
+        model,
+        mode=CompressWeightsMode.CB4_F8E4M3,
+        ratio=ratio,
+        group_size=1,
+        all_layers=all_layers,
+        sensitivity_metric=mode,
+    )
+    names_codebook = {
+        op.get_friendly_name() for op in compressed_model.get_ordered_ops() if op.get_element_type() == ov.Type.f8e4m3
+    }
+
+    assert ref_ids == len(names_codebook)
+
+
+@pytest.mark.parametrize(
+    ("codebook", "dst_type", "n_layers"),
+    (
+        (np.array([i for i in range(-8, 8)], np.int8), ov.Type.i8, 5),
+        (np.array([i for i in range(-(2**6), 2**6)], np.int8), ov.Type.i8, 5),
+        (
+            Tensor(np.array([np.sign(i) * 2 ** np.abs(i) for i in range(-6, 6)]))
+            .as_openvino_tensor()
+            .astype(TensorDataType.f8e4m3),
+            ov.Type.f8e4m3,
+            5,
+        ),
+    ),
+)
+@pytest.mark.parametrize("group_size", (1, -1))
+def test_codebook(codebook, n_layers, dst_type, group_size):
+    model = SequentialMatmulModel().ov_model
+    compressed_model = compress_weights(
+        model,
+        mode=CompressWeightsMode.CODEBOOK,
+        ratio=1.0,
+        group_size=group_size,
+        all_layers=True,
+        advanced_parameters=AdvancedCompressionParameters(codebook=codebook),
+    )
+    names_codebook = [
+        op.get_friendly_name()
+        for op in compressed_model.get_ordered_ops()
+        if op.get_friendly_name().endswith("nncf_codebook")
+    ]
+
+    assert len(names_codebook) == n_layers
+
+    names_codebook = [
+        op.get_friendly_name() for op in compressed_model.get_ordered_ops() if op.get_element_type() == dst_type
+    ]
+
+    assert len(names_codebook) == n_layers
+
+
 @pytest.mark.parametrize(
     ("mode", "data"),
     (
@@ -1045,6 +1218,30 @@ def test_compressed_weighs_range(mode, data):
     assert np.allclose(np.abs(compressed_weighs.data), np.abs(w.data))
 
 
+@pytest.mark.parametrize(
+    ("data"),
+    (
+        ([-8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0]),
+        ([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]),
+        ([-8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]),
+        ([-1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5]),
+    ),
+)
+def test_codebook_weighs_range(data):
+    data = np.array(data).astype(np.float32)
+    codebook = data
+    max_diff = 0.1
+    w = Tensor(data + (np.random.rand(*data.shape) - 0.5) * max_diff)
+    config = WeightCompressionConfig(mode=CompressWeightsMode.CODEBOOK, codebook_values=Tensor(data))
+    _, scale, indexes = do_float_quantization(w, config, -1)
+    uncompressed_data = codebook[indexes.data] * scale.data
+
+    indexes = indexes.flatten()
+    target = np.arange(indexes.shape[0])
+    assert np.allclose(indexes.data, target)
+    assert np.all(np.abs(uncompressed_data.data - data) <= max_diff)
+
+
 @pytest.mark.parametrize(
     ("config", "precompute_scale", "precompute_zero_point", "raises"),
     [
@@ -1528,6 +1725,28 @@ def test_nf4_quantization_mid_quant(weight, scale):
     np.testing.assert_allclose(nf4_quant.data, ref_nf4_quant.data, atol=0, rtol=0)
 
 
+@pytest.mark.parametrize(
+    "codebook",
+    [
+        np.array([0.2, 0.2, 0.3, 0.4], dtype=np.float32),
+        np.array([0.5, 0.2, 0.3, 0.4], dtype=np.float32),
+        np.array([[-1, 0, 1, 2, 3], [-1, 0, 1, 2, 3]], dtype=np.float32),
+        np.array([5], dtype=np.float32),
+    ],
+)
+def test_codebook_is_correct_array(codebook):
+    model = SequentialMatmulModel().ov_model
+
+    # The codebook should be a non empty 1D numpy array and sorted
+    with pytest.raises(nncf.ValidationError):
+        compress_weights(
+            model,
+            mode=CompressWeightsMode.CODEBOOK,
+            group_size=-1,
+            advanced_parameters=nncf.AdvancedCompressionParameters(codebook=codebook),
+        )
+
+
 class TestOVTemplateWeightCompression(TemplateWeightCompression):
     @staticmethod
     def get_matmul_model() -> ov.Model:
diff --git a/tests/openvino/optimized_functions/test_compression_functions.py b/tests/openvino/optimized_functions/test_compression_functions.py
index fcbb127c8d3..67a9fcef14d 100644
--- a/tests/openvino/optimized_functions/test_compression_functions.py
+++ b/tests/openvino/optimized_functions/test_compression_functions.py
@@ -219,7 +219,7 @@ def test_quantization_alignment(weight_shape, config, quantization_task, tensor_
                     if config.is_integer:
                         compressed_weight, scale, zero_point = outputs
                     else:
-                        compressed_weight, scale = outputs
+                        compressed_weight, scale, _ = outputs
                 elif quantization_task == QuantizationTask.Q_DQ:
                     decompressed_weight = outputs
                 else: