|
29 | 29 |
|
30 | 30 | #include <stdint.h>
|
31 | 31 |
|
| 32 | +// The format of the compressed data is: |
| 33 | +// - the size of the uncompressed string in UTF-8 bytes, encoded as a |
| 34 | +// (compress_max_length_bits)-bit number. compress_max_length_bits is |
| 35 | +// computed during dictionary generation time, and happens to be 8 |
| 36 | +// for all current platforms. However, it'll probably end up being |
| 37 | +// 9 in some translations sometime in the future. This length excludes |
| 38 | +// the trailing NUL, though notably decompress_length includes it. |
| 39 | +// |
| 40 | +// - followed by the huffman encoding of the individual UTF-16 code |
| 41 | +// points that make up the string. The trailing "\0" is not |
| 42 | +// represented by a huffman code, but is implied by the length. |
| 43 | +// (building the huffman encoding on UTF-16 code points gave better |
| 44 | +// compression than building it on UTF-8 bytes) |
| 45 | +// |
| 46 | +// The "data" / "tail" construct is so that the struct's last member is a |
| 47 | +// "flexible array". However, the _only_ member is not permitted to be |
| 48 | +// a flexible member, so we have to declare the first byte as a separte |
| 49 | +// member of the structure. |
| 50 | +// |
| 51 | +// For translations where length needs 8 bits, this saves about 1.5 |
| 52 | +// bytes per string on average compared to a structure of {uint16_t, |
| 53 | +// flexible array}, but is also future-proofed against strings with |
| 54 | +// UTF-8 length above 256, with a savings of about 1.375 bytes per |
| 55 | +// string. |
32 | 56 | typedef struct {
|
33 | 57 | uint8_t data;
|
34 | 58 | const uint8_t tail[];
|
35 | 59 | } compressed_string_t;
|
36 | 60 |
|
| 61 | +// Return the compressed, translated version of a source string |
| 62 | +// Usually, due to LTO, this is optimized into a load of a constant |
| 63 | +// pointer. |
37 | 64 | const compressed_string_t* translate(const char* c);
|
38 | 65 | void serial_write_compressed(const compressed_string_t* compressed);
|
39 | 66 | char* decompress(const compressed_string_t* compressed, char* decompressed);
|
|
0 commit comments