Add CIRCUITPY_MESSAGE_COMPRESSION_LEVEL

jepler · jepler · commit 7ab5252cdd3f · 2023-10-20T19:18:18.000+01:00
to trade compile speed &amp; flash size

Initially enable the faster mode on rp2040 and espressif, where there's
usually plenty of flash available (these advanced techniques save hundreds
to thousands of bytes, which is important on a lot of old samd21 boards
but is a drop in the lake of a 4MB flash chip)
diff --git a/mpy-cross/Makefile b/mpy-cross/Makefile
@@ -63,6 +63,8 @@ endif
 OBJ = $(PY_CORE_O)
 OBJ += $(addprefix $(BUILD)/, $(SRC_C:.c=.o))
 
+# CIRCUITPY
 $(BUILD)/supervisor/shared/translate/translate.o: $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/compressed_translations.generated.h
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
 
 include $(TOP)/py/mkrules.mk
diff --git a/ports/espressif/mpconfigport.mk b/ports/espressif/mpconfigport.mk
@@ -148,3 +148,6 @@ endif
 # only if something else is turned off, such as HID.
 USB_NUM_ENDPOINT_PAIRS = 7
 USB_NUM_IN_ENDPOINTS = 5
+
+# Usually lots of flash space available
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 1
diff --git a/ports/raspberrypi/mpconfigport.mk b/ports/raspberrypi/mpconfigport.mk
@@ -52,3 +52,6 @@ USB_NUM_ENDPOINT_PAIRS = 8
 
 INTERNAL_FLASH_FILESYSTEM = 1
 CIRCUITPY_SETTABLE_PROCESSOR_FREQUENCY = 1
+
+# Usually lots of flash space available
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 1
diff --git a/ports/unix/mpconfigport.mk b/ports/unix/mpconfigport.mk
@@ -50,5 +50,6 @@ MICROPY_VFS_LFS2 = 0
 
 # CIRCUITPY
 CIRCUITPY_ULAB = 1
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
 MICROPY_EMIT_NATIVE = 0
 CFLAGS += -DCIRCUITPY=1
diff --git a/ports/unix/variants/coverage/mpconfigvariant.mk b/ports/unix/variants/coverage/mpconfigvariant.mk
@@ -92,3 +92,4 @@ CFLAGS += \
 
 SRC_C += coverage.c
 SRC_CXX += coveragecpp.cpp
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1
diff --git a/py/circuitpy_mpconfig.mk b/py/circuitpy_mpconfig.mk
@@ -52,6 +52,10 @@ CFLAGS += -DCIRCUITPY=$(CIRCUITPY)
 CIRCUITPY_FULL_BUILD ?= 1
 CFLAGS += -DCIRCUITPY_FULL_BUILD=$(CIRCUITPY_FULL_BUILD)
 
+# By default, aggressively reduce the size of in-flash messages, at the cost of
+# increased build time
+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL ?= 9
+
 # Reduce the size of in-flash properties. Requires support in the .ld linker
 # file, so not enabled by default.
 CIRCUITPY_OPTIMIZE_PROPERTY_FLASH_SIZE ?= 0
diff --git a/py/maketranslationdata.py b/py/maketranslationdata.py
@@ -174,7 +174,7 @@ class EncodingTable:
     qstrs_inv: object
 
 
-def compute_huffman_coding(qstrs, translation_name, translations, f):
+def compute_huffman_coding(qstrs, translation_name, translations, f, compression_level):
     # possible future improvement: some languages are better when consider len(k) > 2. try both?
     qstrs = dict((k, v) for k, v in qstrs.items() if len(k) > 3)
     qstr_strs = list(qstrs.keys())
@@ -209,6 +209,8 @@ def remove_offset(c):
             if 0x80 <= ord_c < 0xFF:
                 end_unused = min(ord_c, end_unused)
     max_words = end_unused - 0x80
+    if compression_level < 5:
+        max_words = 0
 
     bits_per_codepoint = 16 if max_ord > 255 else 8
     values_type = "uint16_t" if max_ord > 255 else "uint8_t"
@@ -298,8 +300,12 @@ def est_net_savings(s, occ):
         word = scores[0][0]
         words.append(word)
 
+    splitters = words[:]
+    if compression_level > 3:
+        splitters.extend(qstr_strs)
+
     words.sort(key=len)
-    extractor = TextSplitter(words + qstr_strs)
+    extractor = TextSplitter(splitters)
     counter = collections.Counter()
     used_qstr = 0
     for t in texts:
@@ -356,8 +362,8 @@ def est_net_savings(s, occ):
         len(translation.encode("utf-8")) for (original, translation) in translations
     )
 
-    maxlen = len(words[-1])
-    minlen = len(words[0])
+    maxlen = len(words[-1]) if words else 0
+    minlen = len(words[0]) if words else 0
     wlencount = [len([None for w in words if len(w) == l]) for l in range(minlen, maxlen + 1)]
 
     translation_qstr_bits = used_qstr.bit_length()
@@ -596,6 +602,12 @@ def output_translation_data(encoding_table, i18ns, out):
     parser.add_argument(
         "--translation", default=None, type=str, help="translations for i18n() items"
     )
+    parser.add_argument(
+        "--compression_level",
+        type=int,
+        default=9,
+        help="degree of compression (>5: construct dictionary; >3: use qstrs)",
+    )
     parser.add_argument(
         "--compression_filename",
         type=argparse.FileType("w", encoding="UTF-8"),
@@ -619,6 +631,6 @@ def output_translation_data(encoding_table, i18ns, out):
     i18ns = sorted(i18ns)
     translations = translate(args.translation, i18ns)
     encoding_table = compute_huffman_coding(
-        qstrs, args.translation, translations, args.compression_filename
+        qstrs, args.translation, translations, args.compression_filename, args.compression_level
     )
     output_translation_data(encoding_table, translations, args.translation_filename)
diff --git a/py/py.mk b/py/py.mk
@@ -269,7 +269,7 @@ $(PY_BUILD)/translations-$(TRANSLATION).c: $(HEADER_BUILD)/compressed_translatio
 $(HEADER_BUILD)/compressed_translations.generated.h: $(PY_SRC)/maketranslationdata.py $(HEADER_BUILD)/$(TRANSLATION).mo $(HEADER_BUILD)/qstrdefs.generated.h
 	$(STEPECHO) "GEN $@"
 	$(Q)mkdir -p $(PY_BUILD)
-	$(Q)$(PYTHON) $(PY_SRC)/maketranslationdata.py --compression_filename $(HEADER_BUILD)/compressed_translations.generated.h --translation $(HEADER_BUILD)/$(TRANSLATION).mo --translation_filename $(PY_BUILD)/translations-$(TRANSLATION).c --qstrdefs_filename  $(HEADER_BUILD)/qstrdefs.generated.h $(HEADER_BUILD)/qstrdefs.preprocessed.h
+	$(Q)$(PYTHON) $(PY_SRC)/maketranslationdata.py --compression_filename $(HEADER_BUILD)/compressed_translations.generated.h --translation $(HEADER_BUILD)/$(TRANSLATION).mo --translation_filename $(PY_BUILD)/translations-$(TRANSLATION).c --qstrdefs_filename  $(HEADER_BUILD)/qstrdefs.generated.h --compression_level $(CIRCUITPY_MESSAGE_COMPRESSION_LEVEL) $(HEADER_BUILD)/qstrdefs.preprocessed.h
 
 PY_CORE_O += $(PY_BUILD)/translations-$(TRANSLATION).o
 

Original file line number	Diff line number	Diff line change
`@@ -92,3 +92,4 @@ CFLAGS += \`
`92`	`92`
`93`	`93`	`SRC_C += coverage.c`
`94`	`94`	`SRC_CXX += coveragecpp.cpp`
	`95`	`+CIRCUITPY_MESSAGE_COMPRESSION_LEVEL = 1`