Add quantize_block

awf · awf · commit 30fc8de154d2 · 2024-05-10T14:03:51.000+01:00
diff --git a/src/gfloat/__init__.py b/src/gfloat/__init__.py
@@ -1,6 +1,12 @@
 # Copyright (c) 2024 Graphcore Ltd. All rights reserved.
 
-from .block import BlockFormatInfo, decode_block, encode_block
+from .block import (
+    BlockFormatInfo,
+    decode_block,
+    encode_block,
+    quantize_block,
+    compute_scale_amax,
+)
 from .decode import decode_float
 from .printing import float_pow2str, float_tilde_unless_roundtrip_str
 from .round import encode_float, round_float
diff --git a/src/gfloat/block.py b/src/gfloat/block.py
@@ -4,10 +4,11 @@
 # https://en.wikipedia.org/wiki/Block_floating_point
 
 from dataclasses import dataclass
-from typing import Iterable
+from typing import Iterable, Callable
+import numpy as np
 
 from .decode import decode_float
-from .round import encode_float, round_float, RoundMode
+from .round import RoundMode, encode_float, round_float
 from .types import FormatInfo
 
 
@@ -84,10 +85,12 @@ def encode_block(
     round: RoundMode = RoundMode.TiesToEven,
 ) -> Iterable[int]:
     """
-    Encode a :paramref:`block` of bytes into block Format descibed by :paramref:`fi`
+    Encode float :paramref:`vals` into block Format descibed by :paramref:`fi`
 
-    The :paramref:`scale` is explicitly passed, and is converted to `1/(1/scale)`
-    before rounding to the target format.
+    The :paramref:`scale` is explicitly passed, and the :paramref:`vals` are
+    assumed to already be multiplied by `1/scale`.
+    That is, this is pure encoding, scaling is computed and applied elsewhere
+    (see e.g. :funcref:`quantize_block`).
 
     It is checked for overflow in the target format,
     and will raise an exception if it does.
@@ -105,11 +108,6 @@ def encode_block(
       ValueError: The scale overflows the target scale encoding format.
     """
 
-    # TODO: this should really not do any multiplication -
-    # the scale is to be recorded not applied.
-    recip_scale = 1 / scale
-    scale = 1 / recip_scale
-
     if scale > fi.stype.max:
         raise ValueError(f"Scaled {scale} too large for {fi.stype}")
 
@@ -121,4 +119,55 @@ def enc(ty: FormatInfo, x: float) -> int:
     yield enc(fi.stype, scale)
 
     for val in vals:
-        yield enc(fi.etype, recip_scale * val)
+        yield enc(fi.etype, val)
+
+
+def compute_scale_amax(etype_emax: float, vals: np.array) -> float:
+    """
+    Compute a scale factor such that :paramref:`vals` can be
+    quantized to the range [0, 2**etype_emax]
+
+    Args:
+      etype_emax (float): Maximum exponent to appear in `vals * scale`
+      vals (numpy.array): Input block
+
+    Returns:
+      A float such that `vals * scale` has exponents less than or equal to `etype_emax`.
+
+    Note:
+      If all vals are zero, 1.0 is returned.
+    """
+    amax = np.max(np.abs(vals))
+    if amax == 0.0:
+        # Array is all zeros - 1.0 is a good scale value
+        return 1.0
+    q_log2scale = np.floor(np.log2(amax)) - etype_emax
+    return 2.0**q_log2scale
+
+
+def quantize_block(
+    fi: BlockFormatInfo,
+    vals: np.array,
+    compute_scale: Callable[[float, np.array], float] = compute_scale_amax,
+    round: RoundMode = RoundMode.TiesToEven,
+) -> np.array:
+    """
+    Encode and decode a block of :paramref:`vals` of bytes into block Format descibed by :paramref:`fi`
+
+    Args:
+      fi (BlockFormatInfo): Describes the target block format
+      vals (numpy.array): Input block
+      compute_scale ((float, np.array) -> float):
+          Callable to compute the scale
+      round (RoundMode): Rounding mode to use, defaults to `TiesToEven`
+
+    Returns:
+      An array of floats representing the quantized values.
+
+    Raises:
+      ValueError: The scale overflows the target scale encoding format.
+    """
+
+    q_scale = compute_scale_amax(fi.etype.emax, vals)
+    enc = encode_block(fi, q_scale, vals / q_scale, round)
+    return np.fromiter(decode_block(fi, enc), float)
diff --git a/test/test_block.py b/test/test_block.py
@@ -13,7 +13,7 @@ def test_blocks(fi: BlockFormatInfo) -> None:
     vals = np.linspace(-37.0, 42.0, 32)
 
     scale = 8.0
-    block = list(encode_block(fi, scale, vals))
+    block = list(encode_block(fi, scale, vals / scale))
     decoded_vals = list(decode_block(fi, block))
 
     atol = 2 * scale * fi.etype.eps
diff --git a/test/test_microxcaling.py b/test/test_microxcaling.py
@@ -10,15 +10,7 @@
 from mx.formats import ElemFormat
 
 
-from gfloat import (
-    BlockFormatInfo,
-    encode_block,
-    decode_block,
-    encode_float,
-    decode_float,
-    round_float,
-    RoundMode,
-)
+from gfloat import BlockFormatInfo, RoundMode, quantize_block, compute_scale_amax
 from gfloat.formats import *
 
 
@@ -41,11 +33,10 @@ def test_mx(
     mx_etype: ElemFormat,
     gf_etype: FormatInfo,
 ) -> None:
-    ## Input tensor
+    # Input tensor
     A = np.arange(32) / 2 - 5
 
-    ## Compute MX quantization
-    # Declare block format
+    # MX: Declare block format
     mx_specs = dict(
         block_size=32,
         scale_bits=8,
@@ -54,21 +45,14 @@ def test_mx(
         custom_cuda=False,
     )
 
-    # Compute scale, encode, decode
+    # MX: Quantize
     mx_dq = quantize_mx_op(torch.tensor(A), mx_specs, mx_etype, axes=0, round=mx_round)
 
-    ## Compute GFloat quantization
-    # Declare block format
+    # GFloat: Declare block format
     fi = BlockFormatInfo("test", gf_etype, 32, format_info_ocp_e8m0)
 
-    # Compute scale - this is not considered GFloat's job, but could easily be added
-    amax = np.max(np.abs(A))
-    q_log2scale = np.floor(np.log2(amax)) - fi.etype.emax
-    q_scale = 2**q_log2scale
-
-    # Apply scale to encode and decode
-    enc = encode_block(fi, q_scale, A, gf_round)
-    gf_dq = list(decode_block(fi, enc))
+    # GFloat: Quantize
+    gf_dq = quantize_block(fi, A, compute_scale_amax, gf_round)
 
-    ## Compare
+    # Compare
     np.testing.assert_allclose(gf_dq, mx_dq)