From ef648d9b82267c59567b92cc897aaf1977f12537 Mon Sep 17 00:00:00 2001
From: Sanggyu Lee <takepencil@naver.com>
Date: Fri, 21 Nov 2025 10:20:38 +0900
Subject: [PATCH 1/2] [tools/oquantize] Introduce oquantize

oquantize (where o means circle) aims to quantize weights in circle.
It supports ggml q4_0.

ONE-DCO-1.0-Signed-off-by: Sanggyu Lee <sg5.lee@samsung.com>
---
 tools/oquantize/MANIFEST.in |   1 +
 tools/oquantize/README.md   |  46 +++++++++++
 tools/oquantize/__init__.py |   0
 tools/oquantize/__main__.py |   4 +
 tools/oquantize/main.py     | 154 ++++++++++++++++++++++++++++++++++++
 tools/oquantize/setup.py    | 129 ++++++++++++++++++++++++++++++
 6 files changed, 334 insertions(+)
 create mode 100644 tools/oquantize/MANIFEST.in
 create mode 100644 tools/oquantize/README.md
 create mode 100644 tools/oquantize/__init__.py
 create mode 100644 tools/oquantize/__main__.py
 create mode 100644 tools/oquantize/main.py
 create mode 100644 tools/oquantize/setup.py

diff --git a/tools/oquantize/MANIFEST.in b/tools/oquantize/MANIFEST.in
new file mode 100644
index 00000000000..1d39f83b4f3
--- /dev/null
+++ b/tools/oquantize/MANIFEST.in
@@ -0,0 +1 @@
+include oquantize/lib/*.so
diff --git a/tools/oquantize/README.md b/tools/oquantize/README.md
new file mode 100644
index 00000000000..419675cca52
--- /dev/null
+++ b/tools/oquantize/README.md
@@ -0,0 +1,46 @@
+# Circle Model Quantization with GGML
+
+This tool quantizes Circle models using the GGML library.
+
+## Prerequisites
+- `gcc` installed
+- `flatc` (FlatBuffers compiler) must be available
+- Set `FLATC_PATH` if `flatc` is not in your PATH or standard build locations
+
+## Building the Tool
+
+The tool is structured as a Python package `oquantize` located in `tools/oquantize`.
+It includes a C extension that needs to be compiled and generates `circle.py` from schema.
+
+```bash
+cd tools/oquantize
+python3 setup.py
+```
+
+This compiles `libggml_quant.so` from the GGML source files and generates `circle.py`.
+
+## Running the Tool
+
+To quantize a Circle model, run the `oquantize` package from the `tools` directory:
+
+```bash
+cd tools
+# Usage: python -m oquantize <quant_type> <input_circle> <output_circle>
+python3 -m oquantize q4_0 prefill.circle prefill.q4.circle
+python3 -m oquantize q4_0 decode.circle decode.q4.circle
+```
+
+### File Size Comparison
+
+| File | Original Size | Quantized Size | Reduction |
+|------|---------------|----------------|-----------|
+| prefill.circle | 18M | 2.7M | ~85% |
+| decode.circle | 18M | 2.7M | ~85% |
+
+(Note: significant reduction is observed due to FP32 -> Q4_0 quantization).
+
+## Implementation Details
+- **Package Structure**: `tools/oquantize/`
+- **C Extension**: `libggml_quant.so` compiled from `ggml-quants.c`, `ggml-aarch64.c`, and `ggml.c`
+- **Quantization**: Row-wise `GGML_Q4_0` quantization for `GATHER` (input 0) and `FULLY_CONNECTED` (input 1) weights
+- **Schema**: `circle.py` generated from `runtime/libs/circle-schema/circle_schema.fbs` using `flatc --python --gen-object-api --gen-onefile`
diff --git a/tools/oquantize/__init__.py b/tools/oquantize/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tools/oquantize/__main__.py b/tools/oquantize/__main__.py
new file mode 100644
index 00000000000..40e2b013f61
--- /dev/null
+++ b/tools/oquantize/__main__.py
@@ -0,0 +1,4 @@
+from .main import main
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/oquantize/main.py b/tools/oquantize/main.py
new file mode 100644
index 00000000000..2f936049b7f
--- /dev/null
+++ b/tools/oquantize/main.py
@@ -0,0 +1,154 @@
+import ctypes
+import os
+import sys
+import numpy as np
+import flatbuffers
+
+# Add tools/o2o to sys.path to import circle
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../o2o')))
+try:
+    import circle
+except ImportError:
+    print("Error: Could not import 'circle'. Make sure tools/o2o is in PYTHONPATH or the script is run from the correct location.")
+    sys.exit(1)
+
+def load_ggml_library():
+    lib_path = os.path.join(os.path.dirname(__file__), 'lib', 'libggml_quant.so')
+    if not os.path.exists(lib_path):
+        print(f"Error: {lib_path} not found. Please build the package first.")
+        sys.exit(1)
+
+    lib = ctypes.CDLL(lib_path)
+
+    # void quantize_row_q4_0(const float * x, void * y, int64_t k);
+    lib.quantize_row_q4_0.argtypes = [
+        ctypes.POINTER(ctypes.c_float),
+        ctypes.c_void_p,
+        ctypes.c_int64
+    ]
+    lib.quantize_row_q4_0.restype = None
+
+    return lib
+
+def quantize_tensor(lib, tensor_data):
+    # tensor_data is a numpy array of float32
+    k = tensor_data.size
+
+    if k % 32 != 0:
+        print(f"Warning: Tensor size {k} is not a multiple of 32. Skipping quantization.")
+        return None
+
+    # QK4_0 = 32
+    # block_q4_0 size = sizeof(ggml_half) + QK4_0 / 2 = 2 + 16 = 18 bytes
+    block_size = 18
+    num_blocks = k // 32
+    output_size = num_blocks * block_size
+
+    output_buffer = (ctypes.c_byte * output_size)()
+
+    # Create a pointer to the input data
+    input_ptr = tensor_data.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
+
+    # Call the C function
+    # quantize_row_q4_0 processes the whole row (k elements)
+    lib.quantize_row_q4_0(input_ptr, output_buffer, ctypes.c_int64(k))
+
+    return bytearray(output_buffer)
+
+def main():
+    if len(sys.argv) != 4:
+        print("Usage: python -m oquantize <quant_type> <input_circle> <output_circle>")
+        print("Supported quant_type: q4_0")
+        sys.exit(1)
+
+    quant_type = sys.argv[1]
+    input_path = sys.argv[2]
+    output_path = sys.argv[3]
+
+    if quant_type != "q4_0":
+        print(f"Error: Unsupported quantization type '{quant_type}'. Only 'q4_0' is supported.")
+        sys.exit(1)
+
+    if not os.path.exists(input_path):
+        print(f"Error: Input file {input_path} does not exist.")
+        sys.exit(1)
+
+    lib = load_ggml_library()
+
+    print(f"Loading {input_path}...")
+    with open(input_path, 'rb') as f:
+        buf = f.read()
+
+    model = circle.Model.GetRootAs(buf, 0)
+    model_t = circle.ModelT.InitFromObj(model)
+
+    quantized_count = 0
+
+    for subgraph in model_t.subgraphs:
+        for op in subgraph.operators:
+            target_tensor_idx = -1
+
+            if op.opcodeIndex < len(model_t.operatorCodes):
+                op_code = model_t.operatorCodes[op.opcodeIndex]
+                builtin_code = op_code.builtinCode
+
+                if builtin_code == circle.BuiltinOperator.GATHER:
+                    # GATHER: input 0 is params (weights)
+                    if len(op.inputs) > 0:
+                        target_tensor_idx = op.inputs[0]
+                elif builtin_code == circle.BuiltinOperator.FULLY_CONNECTED:
+                    # FULLY_CONNECTED: input 1 is weights
+                    if len(op.inputs) > 1:
+                        target_tensor_idx = op.inputs[1]
+
+            if target_tensor_idx != -1:
+                tensor = subgraph.tensors[target_tensor_idx]
+
+                if tensor.type == circle.TensorType.FLOAT32:
+                    buffer_idx = tensor.buffer
+                    if buffer_idx < len(model_t.buffers):
+                        buffer_obj = model_t.buffers[buffer_idx]
+
+                        # Check if buffer has data
+                        if buffer_obj.data is not None:
+                            # Convert to numpy array
+                            # buffer_obj.data is a list of ints (bytes) or numpy array
+                            # circle.py generated code usually behaves like this:
+                            # if InitFromObj used numpy, it might be numpy.
+                            # Let's assume it's a list of uint8 or similar.
+
+                            data_bytes = bytes(buffer_obj.data)
+                            tensor_data = np.frombuffer(data_bytes, dtype=np.float32)
+
+                            print(f"Quantizing tensor {target_tensor_idx} (size={tensor_data.size})...")
+
+                            quantized_data = quantize_tensor(lib, tensor_data)
+
+                            if quantized_data is not None:
+                                # Update buffer
+                                buffer_obj.data = list(quantized_data) # FlatBuffers python expects list of ints for ubyte vector?
+                                # Or numpy array? circle.py:
+                                # if np is not None and type(self.data) is np.ndarray: builder.CreateNumpyVector(self.data)
+                                # So we can set it to numpy array of uint8
+                                buffer_obj.data = np.frombuffer(quantized_data, dtype=np.uint8)
+
+                                # Update tensor type
+                                tensor.type = circle.TensorType.GGML_Q4_0
+                                quantized_count += 1
+
+    if quantized_count > 0:
+        print(f"Quantized {quantized_count} tensors.")
+        print(f"Saving to {output_path}...")
+
+        builder = flatbuffers.Builder(1024)
+        model_offset = model_t.Pack(builder)
+        builder.Finish(model_offset, file_identifier=b'CIR0')
+
+        with open(output_path, 'wb') as f:
+            f.write(builder.Output())
+        print("Done.")
+    else:
+        print("No tensors quantized.")
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/oquantize/setup.py b/tools/oquantize/setup.py
new file mode 100644
index 00000000000..f8e4691482e
--- /dev/null
+++ b/tools/oquantize/setup.py
@@ -0,0 +1,129 @@
+import os
+import subprocess
+import sys
+from setuptools import setup, find_packages
+from setuptools.command.build_py import build_py
+
+import shutil
+
+def find_flatc():
+    # 1. Check FLATC_PATH environment variable
+    flatc_env = os.environ.get('FLATC_PATH')
+    if flatc_env and os.path.isfile(flatc_env) and os.access(flatc_env, os.X_OK):
+        return flatc_env
+
+    # 2. Check system PATH
+    flatc_path = shutil.which('flatc')
+    if flatc_path:
+        return flatc_path
+
+    # 3. Check common build locations
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    possible_paths = [
+        os.path.join(script_dir, '../../build/release/overlay/bin/flatc'),
+        os.path.join(script_dir, '../../build/debug/overlay/bin/flatc'),
+    ]
+
+    for path in possible_paths:
+        if os.path.isfile(path) and os.access(path, os.X_OK):
+            return path
+
+    return None
+
+def generate_circle_py():
+    flatc_path = find_flatc()
+    if not flatc_path:
+        print("Error: flatc not found. Please set FLATC_PATH environment variable or ensure flatc is in your PATH or build directory.")
+        sys.exit(1)
+
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    schema_path = os.path.abspath(os.path.join(script_dir, '../../runtime/libs/circle-schema/circle_schema.fbs'))
+
+    if not os.path.exists(schema_path):
+        print(f"Error: Schema file not found at {schema_path}")
+        sys.exit(1)
+
+    output_dir = script_dir
+
+    print(f"Generating circle.py using {flatc_path} from {schema_path}...")
+    cmd = [
+        flatc_path,
+        '--python',
+        '--gen-object-api',
+        '--gen-onefile',
+        '-o', output_dir,
+        schema_path
+    ]
+
+    try:
+        subprocess.run(cmd, check=True)
+        generated_file = os.path.join(output_dir, 'circle_schema_generated.py')
+        target_file = os.path.join(output_dir, 'circle.py')
+        os.rename(generated_file, target_file)
+        print("Successfully generated circle.py")
+    except:
+        print(f"Failed to generate circle.py")
+        sys.exit(1)
+
+def compile_ggml_lib():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    ggml_src_dir = os.path.abspath(os.path.join(script_dir, '../../runtime/3rdparty/ggml/src'))
+    lib_dir = os.path.join(script_dir, 'lib')
+    lib_name = 'libggml_quant.so'
+    lib_path = os.path.join(lib_dir, lib_name)
+
+    if not os.path.exists(lib_dir):
+        os.makedirs(lib_dir)
+
+    print(f"Compiling {lib_name} from {ggml_src_dir}...")
+
+    cmd = [
+        'gcc',
+        '-shared',
+        '-fPIC',
+        '-O3',
+        '-o', lib_path,
+        os.path.join(ggml_src_dir, 'ggml-quants.c'),
+        os.path.join(ggml_src_dir, 'ggml-aarch64.c'),
+        os.path.join(ggml_src_dir, 'ggml.c'),
+        '-I', ggml_src_dir,
+        '-I', os.path.abspath(os.path.join(ggml_src_dir, '../include')),
+        '-lm'
+    ]
+
+    print("Running command:", " ".join(cmd))
+    try:
+        subprocess.check_call(cmd)
+        print(f"Successfully compiled {lib_path}")
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to compile {lib_name}: {e}")
+        sys.exit(1)
+
+class CustomBuildPy(build_py):
+    def run(self):
+        generate_circle_py()
+        compile_ggml_lib()
+        super().run()
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        # Direct execution: python setup.py
+        # Compile in-place
+        generate_circle_py()
+        compile_ggml_lib()
+    else:
+        # Standard setuptools execution
+        setup(
+            name='oquantize',
+            version='0.1.0',
+            packages=['oquantize'],
+            package_dir={'oquantize': '.'},
+            include_package_data=True,
+            cmdclass={
+                'build_py': CustomBuildPy,
+            },
+            install_requires=[
+                'numpy',
+                'flatbuffers',
+            ],
+        )

From 7f2ed2adfe1b1cd69bfe1c79b34c8e53e63dd832 Mon Sep 17 00:00:00 2001
From: Sanggyu Lee <takepencil@naver.com>
Date: Fri, 21 Nov 2025 11:10:59 +0900
Subject: [PATCH 2/2] Update coding style

---
 tools/oquantize/main.py  | 27 +++++++++++++++++++--------
 tools/oquantize/setup.py | 33 ++++++++++++++++-----------------
 2 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/tools/oquantize/main.py b/tools/oquantize/main.py
index 2f936049b7f..5744bb5df0f 100644
--- a/tools/oquantize/main.py
+++ b/tools/oquantize/main.py
@@ -9,9 +9,12 @@
 try:
     import circle
 except ImportError:
-    print("Error: Could not import 'circle'. Make sure tools/o2o is in PYTHONPATH or the script is run from the correct location.")
+    print(
+        "Error: Could not import 'circle'. Make sure tools/o2o is in PYTHONPATH or the script is run from the correct location."
+    )
     sys.exit(1)
 
+
 def load_ggml_library():
     lib_path = os.path.join(os.path.dirname(__file__), 'lib', 'libggml_quant.so')
     if not os.path.exists(lib_path):
@@ -22,14 +25,13 @@ def load_ggml_library():
 
     # void quantize_row_q4_0(const float * x, void * y, int64_t k);
     lib.quantize_row_q4_0.argtypes = [
-        ctypes.POINTER(ctypes.c_float),
-        ctypes.c_void_p,
-        ctypes.c_int64
+        ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int64
     ]
     lib.quantize_row_q4_0.restype = None
 
     return lib
 
+
 def quantize_tensor(lib, tensor_data):
     # tensor_data is a numpy array of float32
     k = tensor_data.size
@@ -55,6 +57,7 @@ def quantize_tensor(lib, tensor_data):
 
     return bytearray(output_buffer)
 
+
 def main():
     if len(sys.argv) != 4:
         print("Usage: python -m oquantize <quant_type> <input_circle> <output_circle>")
@@ -66,7 +69,9 @@ def main():
     output_path = sys.argv[3]
 
     if quant_type != "q4_0":
-        print(f"Error: Unsupported quantization type '{quant_type}'. Only 'q4_0' is supported.")
+        print(
+            f"Error: Unsupported quantization type '{quant_type}'. Only 'q4_0' is supported."
+        )
         sys.exit(1)
 
     if not os.path.exists(input_path):
@@ -120,17 +125,22 @@ def main():
                             data_bytes = bytes(buffer_obj.data)
                             tensor_data = np.frombuffer(data_bytes, dtype=np.float32)
 
-                            print(f"Quantizing tensor {target_tensor_idx} (size={tensor_data.size})...")
+                            print(
+                                f"Quantizing tensor {target_tensor_idx} (size={tensor_data.size})..."
+                            )
 
                             quantized_data = quantize_tensor(lib, tensor_data)
 
                             if quantized_data is not None:
                                 # Update buffer
-                                buffer_obj.data = list(quantized_data) # FlatBuffers python expects list of ints for ubyte vector?
+                                buffer_obj.data = list(
+                                    quantized_data
+                                )  # FlatBuffers python expects list of ints for ubyte vector?
                                 # Or numpy array? circle.py:
                                 # if np is not None and type(self.data) is np.ndarray: builder.CreateNumpyVector(self.data)
                                 # So we can set it to numpy array of uint8
-                                buffer_obj.data = np.frombuffer(quantized_data, dtype=np.uint8)
+                                buffer_obj.data = np.frombuffer(quantized_data,
+                                                                dtype=np.uint8)
 
                                 # Update tensor type
                                 tensor.type = circle.TensorType.GGML_Q4_0
@@ -150,5 +160,6 @@ def main():
     else:
         print("No tensors quantized.")
 
+
 if __name__ == "__main__":
     main()
diff --git a/tools/oquantize/setup.py b/tools/oquantize/setup.py
index f8e4691482e..0c5a57fb938 100644
--- a/tools/oquantize/setup.py
+++ b/tools/oquantize/setup.py
@@ -6,6 +6,7 @@
 
 import shutil
 
+
 def find_flatc():
     # 1. Check FLATC_PATH environment variable
     flatc_env = os.environ.get('FLATC_PATH')
@@ -30,14 +31,18 @@ def find_flatc():
 
     return None
 
+
 def generate_circle_py():
     flatc_path = find_flatc()
     if not flatc_path:
-        print("Error: flatc not found. Please set FLATC_PATH environment variable or ensure flatc is in your PATH or build directory.")
+        print(
+            "Error: flatc not found. Please set FLATC_PATH environment variable or ensure flatc is in your PATH or build directory."
+        )
         sys.exit(1)
 
     script_dir = os.path.dirname(os.path.abspath(__file__))
-    schema_path = os.path.abspath(os.path.join(script_dir, '../../runtime/libs/circle-schema/circle_schema.fbs'))
+    schema_path = os.path.abspath(
+        os.path.join(script_dir, '../../runtime/libs/circle-schema/circle_schema.fbs'))
 
     if not os.path.exists(schema_path):
         print(f"Error: Schema file not found at {schema_path}")
@@ -47,11 +52,7 @@ def generate_circle_py():
 
     print(f"Generating circle.py using {flatc_path} from {schema_path}...")
     cmd = [
-        flatc_path,
-        '--python',
-        '--gen-object-api',
-        '--gen-onefile',
-        '-o', output_dir,
+        flatc_path, '--python', '--gen-object-api', '--gen-onefile', '-o', output_dir,
         schema_path
     ]
 
@@ -65,9 +66,11 @@ def generate_circle_py():
         print(f"Failed to generate circle.py")
         sys.exit(1)
 
+
 def compile_ggml_lib():
     script_dir = os.path.dirname(os.path.abspath(__file__))
-    ggml_src_dir = os.path.abspath(os.path.join(script_dir, '../../runtime/3rdparty/ggml/src'))
+    ggml_src_dir = os.path.abspath(
+        os.path.join(script_dir, '../../runtime/3rdparty/ggml/src'))
     lib_dir = os.path.join(script_dir, 'lib')
     lib_name = 'libggml_quant.so'
     lib_path = os.path.join(lib_dir, lib_name)
@@ -78,17 +81,11 @@ def compile_ggml_lib():
     print(f"Compiling {lib_name} from {ggml_src_dir}...")
 
     cmd = [
-        'gcc',
-        '-shared',
-        '-fPIC',
-        '-O3',
-        '-o', lib_path,
+        'gcc', '-shared', '-fPIC', '-O3', '-o', lib_path,
         os.path.join(ggml_src_dir, 'ggml-quants.c'),
         os.path.join(ggml_src_dir, 'ggml-aarch64.c'),
-        os.path.join(ggml_src_dir, 'ggml.c'),
-        '-I', ggml_src_dir,
-        '-I', os.path.abspath(os.path.join(ggml_src_dir, '../include')),
-        '-lm'
+        os.path.join(ggml_src_dir, 'ggml.c'), '-I', ggml_src_dir, '-I',
+        os.path.abspath(os.path.join(ggml_src_dir, '../include')), '-lm'
     ]
 
     print("Running command:", " ".join(cmd))
@@ -99,12 +96,14 @@ def compile_ggml_lib():
         print(f"Failed to compile {lib_name}: {e}")
         sys.exit(1)
 
+
 class CustomBuildPy(build_py):
     def run(self):
         generate_circle_py()
         compile_ggml_lib()
         super().run()
 
+
 if __name__ == "__main__":
     if len(sys.argv) == 1:
         # Direct execution: python setup.py