crutcher
diff --git a/‎Cargo.lock‎
Lines changed: 2 additions & 1 deletion b/‎Cargo.lock‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎burn-book/src/quantization.md‎
Lines changed: 51 additions & 51 deletions b/‎burn-book/src/quantization.md‎
Lines changed: 51 additions & 51 deletions
diff --git a/‎crates/burn-core/src/lib.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/burn-core/src/lib.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎crates/burn-core/src/lr_scheduler/noam.rs‎
Lines changed: 5 additions & 5 deletions b/‎crates/burn-core/src/lr_scheduler/noam.rs‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎crates/burn-tensor/src/tensor/element/base.rs‎
Lines changed: 17 additions & 46 deletions b/‎crates/burn-tensor/src/tensor/element/base.rs‎
Lines changed: 17 additions & 46 deletions
@@ -30,7 +30,7 @@ Quantization support in Burn is currently in active development.
 
 It supports the following modes on some backends:
 
-- Static per-tensor quantization to signed 8-bit integer (`i8`)
+- Per-tensor and per-block (linear) quantization to 8-bit, 4-bit and 2-bit representations
 
 No integer operations are currently supported, which means tensors are dequantized to perform the
 operations in floating point precision.
@@ -45,48 +45,22 @@ tensors and can collect their statistics, such as the min and max value when usi
 
 ```rust , ignore
 # use burn::module::Quantizer;
-# use burn::tensor::quantization::{Calibration, QuantizationScheme, QuantizationType};
+# use burn::tensor::quantization::{Calibration, QuantLevel, QuantParam, QuantScheme, QuantValue};
 #
 // Quantization config
+let scheme = QuantScheme::default()
+    .with_level(QuantLevel::Block(32))
+    .with_value(QuantValue::Q4F)
+    .with_param(QuantParam::F16);
 let mut quantizer = Quantizer {
     calibration: Calibration::MinMax,
-    scheme: QuantizationScheme::PerTensor(QuantizationMode::Symmetric, QuantizationType::QInt8),
+    scheme,
 };
 
 // Quantize the weights
 let model = model.quantize_weights(&mut quantizer);
 ```
 
-> Given that all operations are currently performed in floating point precision, it might be wise to
-> dequantize the module parameters before inference. This allows us to save disk space by storing
-> the model in reduced precision while preserving the inference speed.
->
-> This can easily be implemented with a `ModuleMapper`.
->
-> ```rust, ignore
-> # use burn::module::{ModuleMapper, ParamId};
-> # use burn::tensor::{backend::Backend, Tensor};
-> #
-> /// Module mapper used to dequantize the model params being loaded.
-> pub struct Dequantize {}
->
-> impl<B: Backend> ModuleMapper<B> for Dequantize {
->     fn map_float<const D: usize>(
->         &mut self,
->         _id: ParamId,
->         tensor: Tensor<B, D>,
->     ) -> Tensor<B, D> {
->         tensor.dequantize()
->     }
-> }
->
-> // Load saved quantized model in floating point precision
-> model = model
->     .load_file(file_path, recorder, &device)
->     .expect("Should be able to load the quantized model weights")
->     .map(&mut Dequantize {});
-> ```
-
 ### Calibration
 
 Calibration is the step during quantization where the range of all floating-point tensors is
@@ -101,29 +75,55 @@ To compute the quantization parameters, Burn supports the following `Calibration
 
 ### Quantization Scheme
 
-A quantization scheme defines the quantized type, quantization granularity and range mapping
-technique.
+A quantization scheme defines how an input is quantized, including the representation of quantized
+values, storage format, granularity, and how the values are scaled.
 
-Burn currently supports the following `QuantizationType` variants.
+```rust
+let scheme = QuantScheme::default()
+    .with_mode(QuantMode::Symmetric)    // Quantization mode
+    .with_level(QuantLevel::Block(32))  // Granularity (per-tensor or per-block)
+    .with_value(QuantValue::Q8S)        // Data type of quantized values, independent of how they're stored
+    .with_store(QuantStore::Native)     // Storage format for quantized values
+    .with_param(QuantParam::F16);       // Precision for quantization parameters
+```
 
-| Type    | Description                        |
-| :------ | :--------------------------------- |
-| `QInt8` | 8-bit signed integer quantization. |
+#### Quantization Mode
 
-Quantization parameters are defined based on the range of values to represent and can typically be
-calculated for the layer's entire weight tensor with per-tensor quantization or separately for each
-channel with per-channel quantization (commonly used with CNNs).
+| Mode        | Description                                  |
+| :---------- | :------------------------------------------- |
+| `Symmetric` | Values are scaled symmetrically around zero. |
 
-Burn currently supports the following `QuantizationScheme` variants.
+#### Quantization Level
 
-| Variant                        | Description                                                                                                                                                              |
-| :----------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `PerTensor(mode, type)`        | Applies a single set of quantization parameters to the entire tensor. The `mode` defines how values are transformed, and `type` represents the target quantization type. |
+| Level               | Description                                                                  |
+| :------------------ | :--------------------------------------------------------------------------- |
+| `Tensor`            | A single quantization parameter set for the entire tensor.                   |
+| `Block(block_size)` | Tensor divided into 1D linear blocks, each with its own quantization params. |
 
-#### Quantization Mode
+#### Quantization Value
+
+| Value | Bits | Description                   |
+| :---- | :--: | :---------------------------- |
+| `Q8F` |  8   | 8-bit full-range quantization |
+| `Q4F` |  4   | 4-bit full-range quantization |
+| `Q2F` |  2   | 2-bit full-range quantization |
+| `Q8S` |  8   | 8-bit symmetric quantization  |
+| `Q4S` |  4   | 4-bit symmetric quantization  |
+| `Q2S` |  2   | 2-bit symmetric quantization  |
+
+#### Quantization Store
+
+| Store    | Description                                             |
+| :------- | :------------------------------------------------------ |
+| `Native` | Each quantized value stored directly in memory.         |
+| `U32`    | Multiple quantized values packed into a 32-bit integer. |
+
+Native storage is not supported for sub-byte quantization values.
 
-| Mode        | Description                                                          |
-| ----------- | -------------------------------------------------------------------- |
-| `Symmetric` | Maps values using a scale factor for a range centered around zero.   |
+#### Quantization Parameters Precision
 
----
+| Param  | Description                    |
+| :----- | :----------------------------- |
+| `F32`  | Full floating-point precision. |
+| `F16`  | Half-precision floating point. |
+| `BF16` | Brain float 16-bit precision.  |
@@ -39,6 +39,8 @@ pub mod record;
 
 /// Module for the tensor.
 pub mod tensor;
+// Tensor at root: `burn::Tensor`
+pub use tensor::Tensor;
 
 /// Module for visual operations
 #[cfg(feature = "vision")]
 
@@ -8,8 +8,8 @@ use crate::{LearningRate, config::Config};
 /// Configuration to create a [noam](NoamLrScheduler) learning rate scheduler.
 #[derive(Config, Debug)]
 pub struct NoamLrSchedulerConfig {
-    /// The initial learning rate.
-    init_lr: LearningRate,
+    /// The overall scale factor for the learning rate decay.
+    factor: f64,
     /// The number of steps before the exponential decay stats.
     #[config(default = 4000)]
     warmup_steps: usize,
@@ -23,7 +23,7 @@ pub struct NoamLrSchedulerConfig {
 pub struct NoamLrScheduler {
     warmup_steps: f64,
     embedding_size: f64,
-    init_lr: LearningRate,
+    factor: f64,
     step: f64,
 }
 
@@ -49,7 +49,7 @@ impl NoamLrSchedulerConfig {
         Ok(NoamLrScheduler {
             warmup_steps: self.warmup_steps as f64,
             embedding_size: self.model_size as f64,
-            init_lr: self.init_lr,
+            factor: self.factor,
             step: 0.0,
         })
     }
@@ -64,7 +64,7 @@ impl LrScheduler for NoamLrScheduler {
         let arg1 = self.step.powf(-0.5);
         let arg2 = self.step * self.warmup_steps.powf(-1.5);
 
-        self.init_lr * self.embedding_size.powf(-0.5) * f64::min(arg1, arg2)
+        self.factor * self.embedding_size.powf(-0.5) * f64::min(arg1, arg2)
     }
 
     fn to_record<B: Backend>(&self) -> Self::Record<B> {
 
@@ -14,7 +14,6 @@ pub trait Element:
     ToElement
     + ElementRandom
     + ElementConversion
-    + ElementPrecision
     + ElementComparison
     + ElementLimits
     + bytemuck::CheckedBitPattern
@@ -78,42 +77,20 @@ pub trait ElementLimits {
     const MAX: Self;
 }
 
-/// Element precision trait for tensor.
-#[derive(Clone, PartialEq, Eq, Copy, Debug)]
-pub enum Precision {
-    /// Double precision, e.g. f64.
-    Double,
-
-    /// Full precision, e.g. f32.
-    Full,
-
-    /// Half precision, e.g. f16.
-    Half,
-
-    /// Other precision.
-    Other,
-}
-
-/// Element precision trait for tensor.
-pub trait ElementPrecision {
-    /// Returns the precision of the element.
-    fn precision() -> Precision;
-}
-
 /// Macro to implement the element trait for a type.
 #[macro_export]
 macro_rules! make_element {
     (
-        ty $type:ident $precision:expr,
+        ty $type:ident,
         convert $convert:expr,
         random $random:expr,
         cmp $cmp:expr,
         dtype $dtype:expr
     ) => {
-        make_element!(ty $type $precision, convert $convert, random $random, cmp $cmp, dtype $dtype, min $type::MIN, max $type::MAX);
+        make_element!(ty $type, convert $convert, random $random, cmp $cmp, dtype $dtype, min $type::MIN, max $type::MAX);
     };
     (
-        ty $type:ident $precision:expr,
+        ty $type:ident,
         convert $convert:expr,
         random $random:expr,
         cmp $cmp:expr,
@@ -140,12 +117,6 @@ macro_rules! make_element {
             }
         }
 
-        impl ElementPrecision for $type {
-            fn precision() -> Precision {
-                $precision
-            }
-        }
-
         impl ElementRandom for $type {
             fn random<R: RngCore>(distribution: Distribution, rng: &mut R) -> Self {
                 #[allow(clippy::redundant_closure_call)]
@@ -170,87 +141,87 @@ macro_rules! make_element {
 }
 
 make_element!(
-    ty f64 Precision::Double,
+    ty f64,
     convert ToElement::to_f64,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &f64, b: &f64| a.total_cmp(b),
     dtype DType::F64
 );
 
 make_element!(
-    ty f32 Precision::Full,
+    ty f32,
     convert ToElement::to_f32,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &f32, b: &f32| a.total_cmp(b),
     dtype DType::F32
 );
 
 make_element!(
-    ty i64 Precision::Double,
+    ty i64,
     convert ToElement::to_i64,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &i64, b: &i64| Ord::cmp(a, b),
     dtype DType::I64
 );
 
 make_element!(
-    ty u64 Precision::Double,
+    ty u64,
     convert ToElement::to_u64,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &u64, b: &u64| Ord::cmp(a, b),
     dtype DType::U64
 );
 
 make_element!(
-    ty i32 Precision::Full,
+    ty i32,
     convert ToElement::to_i32,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &i32, b: &i32| Ord::cmp(a, b),
     dtype DType::I32
 );
 
 make_element!(
-    ty u32 Precision::Full,
+    ty u32,
     convert ToElement::to_u32,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &u32, b: &u32| Ord::cmp(a, b),
     dtype DType::U32
 );
 
 make_element!(
-    ty i16 Precision::Half,
+    ty i16,
     convert ToElement::to_i16,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &i16, b: &i16| Ord::cmp(a, b),
     dtype DType::I16
 );
 
 make_element!(
-    ty u16 Precision::Half,
+    ty u16,
     convert ToElement::to_u16,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &u16, b: &u16| Ord::cmp(a, b),
     dtype DType::U16
 );
 
 make_element!(
-    ty i8 Precision::Other,
+    ty i8,
     convert ToElement::to_i8,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &i8, b: &i8| Ord::cmp(a, b),
     dtype DType::I8
 );
 
 make_element!(
-    ty u8 Precision::Other,
+    ty u8,
     convert ToElement::to_u8,
     random |distribution: Distribution, rng: &mut R| distribution.sampler(rng).sample(),
     cmp |a: &u8, b: &u8| Ord::cmp(a, b),
     dtype DType::U8
 );
 
 make_element!(
-    ty f16 Precision::Half,
+    ty f16,
     convert ToElement::to_f16,
     random |distribution: Distribution, rng: &mut R| {
         let sample: f32 = distribution.sampler(rng).sample();
@@ -260,7 +231,7 @@ make_element!(
     dtype DType::F16
 );
 make_element!(
-    ty bf16 Precision::Half,
+    ty bf16,
     convert ToElement::to_bf16,
     random |distribution: Distribution, rng: &mut R| {
         let sample: f32 = distribution.sampler(rng).sample();
@@ -272,7 +243,7 @@ make_element!(
 
 #[cfg(feature = "cubecl")]
 make_element!(
-    ty flex32 Precision::Half,
+    ty flex32,
     convert |elem: &dyn ToElement| flex32::from_f32(elem.to_f32()),
     random |distribution: Distribution, rng: &mut R| {
         let sample: f32 = distribution.sampler(rng).sample();
@@ -285,7 +256,7 @@ make_element!(
 );
 
 make_element!(
-    ty bool Precision::Other,
+    ty bool,
     convert ToElement::to_bool,
     random |distribution: Distribution, rng: &mut R| {
         let sample: u8 = distribution.sampler(rng).sample();