Skip to content

Commit 11f1c42

Browse files
committed
Turn on unicode for FATFS
This also tweaks the repr for unicode strings to only escape a few utf-8 code points. This makes emoji show in os.listdir() for example. Also, enable exfat support on full builds. Fixes #5146
1 parent 5b0009c commit 11f1c42

File tree

5 files changed

+30
-6
lines changed

5 files changed

+30
-6
lines changed

lib/oofatfs/ff.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,7 @@ static DWORD get_fat ( /* 0xFFFFFFFF:Disk error, 1:Internal error, 2..0x7FF
11751175
break;
11761176
}
11771177
}
1178+
MP_FALLTHROUGH
11781179
/* go to default */
11791180
#endif
11801181
default:
@@ -5518,6 +5519,7 @@ FRESULT f_mkfs (
55185519
}
55195520
st = 1; /* Do not compress short run */
55205521
/* go to next case */
5522+
MP_FALLTHROUGH
55215523
case 1:
55225524
ch = si++; /* Fill the short run */
55235525
if (--j == 0) st = 0;

lib/oofatfs/ffconf.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,11 @@
163163
/ memory for the working buffer, memory management functions, ff_memalloc() and
164164
/ ff_memfree() in ffsystem.c, need to be added to the project. */
165165

166-
166+
#ifdef MICROPY_FATFS_LFN_UNICODE
167+
#define FF_LFN_UNICODE (MICROPY_FATFS_LFN_UNICODE)
168+
#else
167169
#define FF_LFN_UNICODE 0
170+
#endif
168171
/* This option switches the character encoding on the API when LFN is enabled.
169172
/
170173
/ 0: ANSI/OEM in current CP (TCHAR = char)

py/circuitpy_mpconfig.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,15 @@
132132
//
133133
// 1 = SFN/ANSI 437=LFN/U.S.(OEM)
134134
#define MICROPY_FATFS_ENABLE_LFN (1)
135+
// Code page is ignored because unicode is enabled.
135136
// Don't use parens on the value below because it gets combined with a prefix in
136137
// the preprocessor.
137138
#define MICROPY_FATFS_LFN_CODE_PAGE 437
138139
#define MICROPY_FATFS_USE_LABEL (1)
139140
#define MICROPY_FATFS_RPATH (2)
140141
#define MICROPY_FATFS_MULTI_PARTITION (1)
142+
#define MICROPY_FATFS_EXFAT (CIRCUITPY_FULL_BUILD)
143+
#define MICROPY_FATFS_LFN_UNICODE 2 // UTF-8
141144

142145
// Only enable this if you really need it. It allocates a byte cache of this size.
143146
// #define MICROPY_FATFS_MAX_SS (4096)

py/objstrunicode.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_bu
4141
/******************************************************************************/
4242
/* str */
4343

44+
45+
// These settings approximate CPython's printability. It is not
46+
// exhaustive and may print "unprintable" characters. All ASCII control codes
47+
// are escaped along with variable space widths and paragraph designators.
48+
// Unlike CPython, we do not escape private use codes or reserved characters.
49+
// We assume that the unicode is well formed.
50+
// CPython policy is documented here: https://github.com/python/cpython/blob/bb3e0c240bc60fe08d332ff5955d54197f79751c/Objects/unicodectype.c#L147-L159
4451
STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint str_len) {
4552
// this escapes characters, but it will be very slow to print (calling print many times)
4653
bool has_single_quote = false;
@@ -61,25 +68,26 @@ STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint
6168
while (s < top) {
6269
unichar ch;
6370
ch = utf8_get_char(s);
71+
const byte *start = s;
6472
s = utf8_next_char(s);
6573
if (ch == quote_char) {
6674
mp_printf(print, "\\%c", quote_char);
6775
} else if (ch == '\\') {
6876
mp_print_str(print, "\\\\");
69-
} else if (32 <= ch && ch <= 126) {
70-
mp_printf(print, "%c", ch);
7177
} else if (ch == '\n') {
7278
mp_print_str(print, "\\n");
7379
} else if (ch == '\r') {
7480
mp_print_str(print, "\\r");
7581
} else if (ch == '\t') {
7682
mp_print_str(print, "\\t");
77-
} else if (ch < 0x100) {
83+
} else if (ch <= 0x1f || (0x7f <= ch && ch <= 0xa0) || ch == 0xad) {
7884
mp_printf(print, "\\x%02x", ch);
79-
} else if (ch < 0x10000) {
85+
} else if ((0x2000 <= ch && ch <= 0x200f) || ch == 0x2028 || ch == 0x2029) {
8086
mp_printf(print, "\\u%04x", ch);
8187
} else {
82-
mp_printf(print, "\\U%08x", ch);
88+
// Print the full character out.
89+
int width = s - start;
90+
mp_print_strn(print, (const char *)start, width, 0, ' ', width);
8391
}
8492
}
8593
mp_printf(print, "%c", quote_char);

tests/unicode/unicode_repr.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# ¥ is 1 byte wide
2+
# Œ is 2 bytes wide
3+
# 😅 is 4 bytes wide
4+
5+
a = "hello¥Œ😅.txt\n\r\t'\"\\"
6+
7+
print(a)
8+
print(repr(a))

0 commit comments

Comments
 (0)