Turn on unicode for FATFS

tannewt · tannewt · commit 11f1c42bb5b1 · 2021-08-17T17:41:59.000-07:00
This also tweaks the repr for unicode strings to only escape a few utf-8 code points. This makes emoji show in os.listdir() for example. Also, enable exfat support on full builds. Fixes #5146
diff --git a/lib/oofatfs/ff.c b/lib/oofatfs/ff.c
@@ -1175,6 +1175,7 @@ static DWORD get_fat (      /* 0xFFFFFFFF:Disk error, 1:Internal error, 2..0x7FF
                     break;
                 }
             }
+            MP_FALLTHROUGH
             /* go to default */
 #endif
         default:
@@ -5518,6 +5519,7 @@ FRESULT f_mkfs (
                 }
                 st = 1;         /* Do not compress short run */
                 /* go to next case */
+                MP_FALLTHROUGH
             case 1:
                 ch = si++;      /* Fill the short run */
                 if (--j == 0) st = 0;
diff --git a/lib/oofatfs/ffconf.h b/lib/oofatfs/ffconf.h
@@ -163,8 +163,11 @@
 /  memory for the working buffer, memory management functions, ff_memalloc() and
 /  ff_memfree() in ffsystem.c, need to be added to the project. */
 
-
+#ifdef MICROPY_FATFS_LFN_UNICODE
+#define FF_LFN_UNICODE  (MICROPY_FATFS_LFN_UNICODE)
+#else
 #define FF_LFN_UNICODE  0
+#endif
 /* This option switches the character encoding on the API when LFN is enabled.
 /
 /   0: ANSI/OEM in current CP (TCHAR = char)
diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h
@@ -132,12 +132,15 @@
 //
 // 1 = SFN/ANSI 437=LFN/U.S.(OEM)
 #define MICROPY_FATFS_ENABLE_LFN      (1)
+// Code page is ignored because unicode is enabled.
 // Don't use parens on the value below because it gets combined with a prefix in
 // the preprocessor.
 #define MICROPY_FATFS_LFN_CODE_PAGE   437
 #define MICROPY_FATFS_USE_LABEL       (1)
 #define MICROPY_FATFS_RPATH           (2)
 #define MICROPY_FATFS_MULTI_PARTITION (1)
+#define MICROPY_FATFS_EXFAT           (CIRCUITPY_FULL_BUILD)
+#define MICROPY_FATFS_LFN_UNICODE      2  // UTF-8
 
 // Only enable this if you really need it. It allocates a byte cache of this size.
 // #define MICROPY_FATFS_MAX_SS           (4096)
diff --git a/py/objstrunicode.c b/py/objstrunicode.c
@@ -41,6 +41,13 @@ STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_bu
 /******************************************************************************/
 /* str                                                                        */
 
+
+// These settings approximate CPython's printability. It is not
+// exhaustive and may print "unprintable" characters. All ASCII control codes
+// are escaped along with variable space widths and paragraph designators.
+// Unlike CPython, we do not escape private use codes or reserved characters.
+// We assume that the unicode is well formed.
+// CPython policy is documented here: https://github.com/python/cpython/blob/bb3e0c240bc60fe08d332ff5955d54197f79751c/Objects/unicodectype.c#L147-L159
 STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) {
     // this escapes characters, but it will be very slow to print (calling print many times)
     bool has_single_quote = false;
@@ -61,25 +68,26 @@ STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint
     while (s < top) {
         unichar ch;
         ch = utf8_get_char(s);
+        const byte *start = s;
         s = utf8_next_char(s);
         if (ch == quote_char) {
             mp_printf(print, "\\%c", quote_char);
         } else if (ch == '\\') {
             mp_print_str(print, "\\\\");
-        } else if (32 <= ch && ch <= 126) {
-            mp_printf(print, "%c", ch);
         } else if (ch == '\n') {
             mp_print_str(print, "\\n");
         } else if (ch == '\r') {
             mp_print_str(print, "\\r");
         } else if (ch == '\t') {
             mp_print_str(print, "\\t");
-        } else if (ch < 0x100) {
+        } else if (ch <= 0x1f || (0x7f <= ch && ch <= 0xa0) || ch == 0xad) {
             mp_printf(print, "\\x%02x", ch);
-        } else if (ch < 0x10000) {
+        } else if ((0x2000 <= ch && ch <= 0x200f) || ch == 0x2028 || ch == 0x2029) {
             mp_printf(print, "\\u%04x", ch);
         } else {
-            mp_printf(print, "\\U%08x", ch);
+            // Print the full character out.
+            int width = s - start;
+            mp_print_strn(print, (const char *)start, width, 0, ' ', width);
         }
     }
     mp_printf(print, "%c", quote_char);
diff --git a/tests/unicode/unicode_repr.py b/tests/unicode/unicode_repr.py
@@ -0,0 +1,8 @@
+# ¥ is 1 byte wide
+# Œ is 2 bytes wide
+# 😅 is 4 bytes wide
+
+a = "hello¥Œ😅.txt\n\r\t'\"\\"
+
+print(a)
+print(repr(a))

Original file line number	Diff line number	Diff line change
`@@ -1175,6 +1175,7 @@ static DWORD get_fat ( /* 0xFFFFFFFF:Disk error, 1:Internal error, 2..0x7FF`
`1175`	`1175`	`break;`
`1176`	`1176`	`}`
`1177`	`1177`	`}`
	`1178`	`+ MP_FALLTHROUGH`
`1178`	`1179`	`/* go to default */`
`1179`	`1180`	`#endif`
`1180`	`1181`	`default:`
`@@ -5518,6 +5519,7 @@ FRESULT f_mkfs (`
`5518`	`5519`	`}`
`5519`	`5520`	`st = 1; /* Do not compress short run */`
`5520`	`5521`	`/* go to next case */`
	`5522`	`+ MP_FALLTHROUGH`
`5521`	`5523`	`case 1:`
`5522`	`5524`	`ch = si++; /* Fill the short run */`
`5523`	`5525`	`if (--j == 0) st = 0;`