@@ -19,88 +19,88 @@ namespace alp {
1919 * reference and bitpacked. Every exception, where the conversion/reconversion changes the value of
2020 * the float, is stored separately and has to be patched into the decompressed vector afterwards.
2121 *
22- * ═══════════════════════════════════════════════════════════════════════════════════════
22+ * ==========================================================================================
2323 * ALP COMPRESSION/DECOMPRESSION PIPELINE
24- * ═══════════════════════════════════════════════════════════════════════════════════════
24+ * ==========================================================================================
2525 *
2626 * COMPRESSION FLOW:
27- * ─────────────────
27+ * -----------------
2828 *
2929 * Input: float/double array
30- * │
31- * ▼
32- * ┌─────────────────────────────────────────────────────────────────┐
33- * │ 1. SAMPLING & PRESET GENERATION │
34- * │ • Sample vectors from dataset │
35- * │ • Try all exponent/factor combinations (e, f) │
36- * │ • Select best k combinations for preset │
37- * └──────────────────────────────────────┬──────────────────────────┘
38- * │ preset.combinations
39- * ▼
40- * ┌─────────────────────────────────────────────────────────────────┐
41- * │ 2. PER-VECTOR COMPRESSION │
42- * │ a) Find best (e,f) from preset for this vector │
43- * │ b) Encode: encoded[i] = int64(value[i] * 10^e * 10^-f) │
44- * │ c) Verify: if decode(encoded[i]) ≠ value[i] → exception │
45- * │ d) Replace exceptions with placeholder value │
46- * └──────────────────────────────────────┬──────────────────────────┘
47- * │ encoded integers + exceptions
48- * ▼
49- * ┌─────────────────────────────────────────────────────────────────┐
50- * │ 3. FRAME OF REFERENCE (FOR) │
51- * │ • Find min value in encoded integers │
52- * │ • Subtract min from all values: delta[i] = encoded[i] - min │
53- * └──────────────────────────────────────┬──────────────────────────┘
54- * │ delta values (smaller range)
55- * ▼
56- * ┌─────────────────────────────────────────────────────────────────┐
57- * │ 4. BIT PACKING │
58- * │ • Calculate bitWidth = log2(max_delta) │
59- * │ • Pack each value into bitWidth bits │
60- * │ • Result: tightly packed binary data │
61- * └──────────────────────────────────────┬──────────────────────────┘
62- * │ packed bytes
63- * ▼
64- * ┌─────────────────────────────────────────────────────────────────┐
65- * │ 5. SERIALIZATION (see AlpEncodedVector diagram below) │
66- * │ [VectorInfo][PackedData][ExceptionPos][ExceptionValues] │
67- * └─────────────────────────────────────────────────────────────────┘
30+ * |
31+ * v
32+ * +------------------------------------------------------------------+
33+ * | 1. SAMPLING & PRESET GENERATION |
34+ * | * Sample vectors from dataset |
35+ * | * Try all exponent/factor combinations (e, f) |
36+ * | * Select best k combinations for preset |
37+ * +------------------------------------+-----------------------------+
38+ * | preset.combinations
39+ * v
40+ * +------------------------------------------------------------------+
41+ * | 2. PER-VECTOR COMPRESSION |
42+ * | a) Find best (e,f) from preset for this vector |
43+ * | b) Encode: encoded[i] = int64(value[i] * 10^e * 10^-f) |
44+ * | c) Verify: if decode(encoded[i]) != value[i] -> exception |
45+ * | d) Replace exceptions with placeholder value |
46+ * +------------------------------------+-----------------------------+
47+ * | encoded integers + exceptions
48+ * v
49+ * +------------------------------------------------------------------+
50+ * | 3. FRAME OF REFERENCE (FOR) |
51+ * | * Find min value in encoded integers |
52+ * | * Subtract min from all values: delta[i] = encoded[i] - min |
53+ * +------------------------------------+-----------------------------+
54+ * | delta values (smaller range)
55+ * v
56+ * +------------------------------------------------------------------+
57+ * | 4. BIT PACKING |
58+ * | * Calculate bitWidth = log2(max_delta) |
59+ * | * Pack each value into bitWidth bits |
60+ * | * Result: tightly packed binary data |
61+ * +------------------------------------+-----------------------------+
62+ * | packed bytes
63+ * v
64+ * +------------------------------------------------------------------+
65+ * | 5. SERIALIZATION (see AlpEncodedVector diagram below) |
66+ * | [VectorInfo][PackedData][ExceptionPos][ExceptionValues] |
67+ * +------------------------------------------------------------------+
6868 *
6969 *
7070 * DECOMPRESSION FLOW:
71- * ───────────────────
71+ * -------------------
7272 *
73- * Serialized bytes → AlpEncodedVector::load()
74- * │
75- * ▼
76- * ┌─────────────────────────────────────────────────────────────────┐
77- * │ 1. BIT UNPACKING │
78- * │ • Extract bitWidth from metadata │
79- * │ • Unpack each value from bitWidth bits → delta values │
80- * └──────────────────────────────────────┬──────────────────────────┘
81- * │ delta values
82- * ▼
83- * ┌─────────────────────────────────────────────────────────────────┐
84- * │ 2. REVERSE FRAME OF REFERENCE (unFOR) │
85- * │ • Add back min: encoded[i] = delta[i] + frameOfReference │
86- * └──────────────────────────────────────┬──────────────────────────┘
87- * │ encoded integers
88- * ▼
89- * ┌─────────────────────────────────────────────────────────────────┐
90- * │ 3. DECODE │
91- * │ • Apply inverse formula: value[i] = encoded[i] * 10^-e * 10^f│
92- * └──────────────────────────────────────┬──────────────────────────┘
93- * │ decoded floats (with placeholders)
94- * ▼
95- * ┌─────────────────────────────────────────────────────────────────┐
96- * │ 4. PATCH EXCEPTIONS │
97- * │ • Replace values at exceptionPositions[] with exceptions[] │
98- * └──────────────────────────────────────┬──────────────────────────┘
99- * │
100- * ▼
73+ * Serialized bytes -> AlpEncodedVector::load()
74+ * |
75+ * v
76+ * +------------------------------------------------------------------+
77+ * | 1. BIT UNPACKING |
78+ * | * Extract bitWidth from metadata |
79+ * | * Unpack each value from bitWidth bits -> delta values |
80+ * +------------------------------------+-----------------------------+
81+ * | delta values
82+ * v
83+ * +------------------------------------------------------------------+
84+ * | 2. REVERSE FRAME OF REFERENCE (unFOR) |
85+ * | * Add back min: encoded[i] = delta[i] + frameOfReference |
86+ * +------------------------------------+-----------------------------+
87+ * | encoded integers
88+ * v
89+ * +------------------------------------------------------------------+
90+ * | 3. DECODE |
91+ * | * Apply inverse formula: value[i] = encoded[i] * 10^-e * 10^f |
92+ * +------------------------------------+-----------------------------+
93+ * | decoded floats (with placeholders)
94+ * v
95+ * +------------------------------------------------------------------+
96+ * | 4. PATCH EXCEPTIONS |
97+ * | * Replace values at exceptionPositions[] with exceptions[] |
98+ * +------------------------------------+-----------------------------+
99+ * |
100+ * v
101101 * Output: Original float/double array (lossless!)
102102 *
103- * ═══════════════════════════════════════════════════════════════════════════════════════
103+ * ==========================================================================================
104104 */
105105
106106/* *
@@ -130,21 +130,21 @@ struct AlpExponentAndFactor {
130130 *
131131 * Serialization format (stored as raw binary struct):
132132 *
133- * ┌──────────────────────────────────────────┐
134- * │ AlpEncodedVectorInfo (23+ bytes) │
135- * ├──────────────────────────────────────────┤
136- * │ Offset │ Field │ Size │
137- * ├─────────┼─────────────────────┼──────────┤
138- * │ 0 │ exponent (uint8_t) │ 1 byte │
139- * │ 1 │ factor (uint8_t) │ 1 byte │
140- * │ 2 │ [padding] │ 6 bytes │
141- * │ 8 │ frameOfReference │ 8 bytes │
142- * │ 16 │ bitWidth (uint8_t) │ 1 byte │
143- * │ 17 │ [padding] │ 7 bytes │
144- * │ 24 │ bitPackedSize │ 8 bytes │
145- * │ 32 │ numElements │ 2 bytes │
146- * │ 34 │ numExceptions │ 2 bytes │
147- * └──────────────────────────────────────────┘
133+ * +------------------------------------------+
134+ * | AlpEncodedVectorInfo (23+ bytes) |
135+ * +------------------------------------------+
136+ * | Offset | Field | Size |
137+ * +---------+---------------------+----------+
138+ * | 0 | exponent (uint8_t) | 1 byte |
139+ * | 1 | factor (uint8_t) | 1 byte |
140+ * | 2 | [padding] | 6 bytes |
141+ * | 8 | frameOfReference | 8 bytes |
142+ * | 16 | bitWidth (uint8_t) | 1 byte |
143+ * | 17 | [padding] | 7 bytes |
144+ * | 24 | bitPackedSize | 8 bytes |
145+ * | 32 | numElements | 2 bytes |
146+ * | 34 | numExceptions | 2 bytes |
147+ * +------------------------------------------+
148148 */
149149struct AlpEncodedVectorInfo {
150150 // / Exponent and factor used for compression.
@@ -174,23 +174,23 @@ struct AlpEncodedVectorInfo {
174174/* *
175175 * Complete serialization format for an ALP compressed vector:
176176 *
177- * ┌────────────────────────────────────────────────────────────┐
178- * │ AlpEncodedVector<T> Serialized Layout │
179- * ├────────────────────────────────────────────────────────────┤
180- * │ Section │ Size (bytes) │ Description │
181- * ├───────────────────────┼──────────────────────┼──────────────┤
182- * │ 1. VectorInfo │ sizeof(VectorInfo) │ Metadata │
183- * │ (see above) │ (~36 with padding) │ │
184- * ├───────────────────────┼──────────────────────┼──────────────┤
185- * │ 2. Packed Values │ bitPackedSize │ Bitpacked │
186- * │ (compressed data) │ (variable) │ integers │
187- * ├───────────────────────┼──────────────────────┼──────────────┤
188- * │ 3. Exception Pos │ numExceptions * 2 │ uint16_t[] │
189- * │ (indices) │ (variable) │ positions │
190- * ├───────────────────────┼──────────────────────┼──────────────┤
191- * │ 4. Exception Values │ numExceptions * │ T[] (float/ │
192- * │ (original floats) │ sizeof(T) │ double) │
193- * └────────────────────────────────────────────────────────────┘
177+ * +------------------------------------------------------------+
178+ * | AlpEncodedVector<T> Serialized Layout |
179+ * +------------------------------------------------------------+
180+ * | Section | Size (bytes) | Description |
181+ * +-----------------------+----------------------+-------------+
182+ * | 1. VectorInfo | sizeof(VectorInfo) | Metadata |
183+ * | (see above) | (~36 with padding) | |
184+ * +-----------------------+----------------------+-------------+
185+ * | 2. Packed Values | bitPackedSize | Bitpacked |
186+ * | (compressed data) | (variable) | integers |
187+ * +-----------------------+----------------------+-------------+
188+ * | 3. Exception Pos | numExceptions * 2 | uint16_t[] |
189+ * | (indices) | (variable) | positions |
190+ * +-----------------------+----------------------+-------------+
191+ * | 4. Exception Values | numExceptions * | T[] (float/|
192+ * | (original floats) | sizeof(T) | double) |
193+ * +------------------------------------------------------------+
194194 *
195195 * Example for 1024 floats with 5 exceptions and bitWidth=8:
196196 * - VectorInfo: 36 bytes
0 commit comments