Skip to content

Commit 2a36a61

Browse files
committed
Add UTF-8 length procedure.
Add UTF-8 offset procedure. Add UTF-8 substr procedure. Add wchar_t to UTF-8 conversion procedure. Add multibyte to wchar_t conversion procedure (posix). Add UTF-8 tests. Fix password reader to validate utf-8 encoding when reading from pipe. Fix password reader to read utf-8 encoded strings from *nix console.
1 parent bb34d0c commit 2a36a61

File tree

3 files changed

+488
-67
lines changed

3 files changed

+488
-67
lines changed

stew/conio.nim

Lines changed: 100 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
## those terms.
88

99
## This module implements cross-platform console procedures.
10-
import io2
10+
import io2, utf8
1111
export io2
1212

1313
when defined(windows):
@@ -62,6 +62,7 @@ when defined(windows):
6262
ENABLE_PROCESSED_INPUT = 0x0001'u32
6363
ENABLE_ECHO_INPUT = 0x0004'u32
6464
FILE_TYPE_CHAR = 0x0002'u32
65+
ERROR_NO_UNICODE_TRANSLATION = 1113'u32
6566

6667
proc isConsoleRedirected*(hConsole: uint): bool =
6768
## Returns ``true`` if console handle was redirected.
@@ -73,7 +74,7 @@ when defined(windows):
7374
else:
7475
true
7576

76-
proc readConsoleInput(maxBytes: int): IoResult[string] =
77+
proc readConsoleInput(maxChars: int): IoResult[string] =
7778
let hConsoleInput =
7879
block:
7980
let res = getStdHandle(STD_INPUT_HANDLE)
@@ -94,8 +95,9 @@ when defined(windows):
9495
if setConsoleCP(CP_UTF8) == 0'i32:
9596
return err(ioLastError())
9697

97-
# Allocating buffer with size equal to `maxBytes` + len(CRLF)
98-
var buffer = newString(maxBytes + 2)
98+
# Allocating buffer with size equal to `(maxChars + len(CRLF)) * 4`,
99+
# where 4 is maximum expected size of one character (UTF8 encoding).
100+
var buffer = newString((maxChars + 2) * 4)
99101
let bytesToRead = uint32(len(buffer))
100102
var bytesRead: uint32
101103
let rres = readFile(hConsoleInput, cast[pointer](addr buffer[0]),
@@ -109,7 +111,7 @@ when defined(windows):
109111
return err(ioLastError())
110112

111113
# Truncate additional bytes from buffer.
112-
buffer.setLen(int(min(bytesRead, uint32(maxBytes))))
114+
buffer.setLen(int(bytesRead))
113115

114116
# Trim CR/CRLF from buffer.
115117
if len(buffer) > 0:
@@ -123,7 +125,13 @@ when defined(windows):
123125
buffer.setLen(len(buffer) - 1)
124126
elif buffer[^1] == char(0x0D):
125127
buffer.setLen(len(buffer) - 1)
126-
ok(buffer)
128+
129+
# Check if buffer is valid UTF-8 encoded string.
130+
if utf8Validate(buffer):
131+
# Cut result buffer to `maxChars` characters.
132+
ok(utf8Substr(buffer, 0, maxChars - 1).get())
133+
else:
134+
err(IoErrorCode(ERROR_NO_UNICODE_TRANSLATION))
127135
else:
128136
let prevMode =
129137
block:
@@ -147,8 +155,8 @@ when defined(windows):
147155
discard setConsoleCP(prevInputCP)
148156
return err(errCode)
149157

150-
# Allocating buffer with size equal to `maxBytes` + len(CRLF)
151-
var buffer = newSeq[Utf16Char](maxBytes + 2)
158+
# Allocating buffer with size equal to `maxChars` + len(CRLF).
159+
var buffer = newSeq[Utf16Char](maxChars + 2)
152160
let charsToRead = uint32(len(buffer))
153161
var charsRead: uint32
154162
let rres = readConsole(hConsoleInput, cast[pointer](addr buffer[0]),
@@ -170,7 +178,8 @@ when defined(windows):
170178
return err(ioLastError())
171179

172180
# Truncate additional bytes from buffer.
173-
buffer.setLen(int(min(charsRead, uint32(maxBytes))))
181+
buffer.setLen(int(min(charsRead, uint32(maxChars))))
182+
174183
# Truncate CRLF in result wide string.
175184
if len(buffer) > 0:
176185
if int16(buffer[^1]) == int16(0x0A):
@@ -184,7 +193,7 @@ when defined(windows):
184193
elif int16(buffer[^1]) == int16(0x0D):
185194
buffer.setLen(len(buffer) - 1)
186195

187-
# Convert Windows UTF-16 encoded string to UTF-8 encoded string.
196+
# Convert Windows UCS-2 encoded string to UTF-8 encoded string.
188197
if len(buffer) > 0:
189198
var pwd = ""
190199
let bytesNeeded = wideCharToMultiByte(CP_UTF8, 0'u32, addr buffer[0],
@@ -277,61 +286,101 @@ elif defined(posix):
277286
else:
278287
ok()
279288

280-
proc readConsoleInput(maxBytes: int): IoResult[string] =
281-
# Allocating buffer with size equal to `maxBytes` + len(LF)
282-
var buffer = newString(maxBytes + 1)
283-
let bytesRead =
284-
if isConsoleRedirected(STDIN_FILENO):
285-
let res = posix.read(STDIN_FILENO, cast[pointer](addr buffer[0]),
286-
len(buffer))
287-
if res < 0:
288-
return err(ioLastError())
289-
res
289+
proc readConsoleInput(maxChars: int): IoResult[string] =
290+
# Allocating buffer with size equal to `(maxChars + len(LF)) * 4`, where
291+
# 4 is maximum expected size of one character (UTF8 encoding).
292+
var buffer = newString((maxChars + 1) * 4)
293+
294+
if isConsoleRedirected(STDIN_FILENO):
295+
let bytesRead =
296+
block:
297+
let res = posix.read(STDIN_FILENO, cast[pointer](addr buffer[0]),
298+
len(buffer))
299+
if res < 0:
300+
return err(ioLastError())
301+
res
302+
303+
# Truncate additional bytes from buffer.
304+
buffer.setLen(bytesRead)
305+
306+
# Trim LF in result string
307+
if len(buffer) > 0:
308+
if buffer[^1] == char(0x0A):
309+
buffer.setLen(len(buffer) - 1)
310+
311+
# Check if buffer is valid UTF-8 encoded string.
312+
if utf8Validate(buffer):
313+
# Cut result buffer to `maxChars` characters.
314+
ok(utf8Substr(buffer, 0, maxChars - 1).get())
290315
else:
291-
var cur, old: Termios
292-
if tcGetAttr(STDIN_FILENO, addr cur) != cint(0):
293-
return err(ioLastError())
316+
err(IoErrorCode(EILSEQ))
317+
else:
318+
let bytesRead =
319+
block:
320+
var cur, old: Termios
321+
if tcGetAttr(STDIN_FILENO, addr cur) != cint(0):
322+
return err(ioLastError())
294323

295-
old = cur
296-
cur.c_lflag = cur.c_lflag and not(Cflag(ECHO))
324+
old = cur
325+
cur.c_lflag = cur.c_lflag and not(Cflag(ECHO))
297326

298-
if tcSetAttr(STDIN_FILENO, TCSADRAIN, addr(cur)) != cint(0):
299-
return err(ioLastError())
327+
if tcSetAttr(STDIN_FILENO, TCSADRAIN, addr(cur)) != cint(0):
328+
return err(ioLastError())
300329

301-
let res = read(STDIN_FILENO, cast[pointer](addr buffer[0]),
302-
len(buffer))
303-
if res < 0:
304-
let errCode = ioLastError()
305-
discard tcSetAttr(STDIN_FILENO, TCSADRAIN, addr(old))
306-
return err(errCode)
330+
let res = read(STDIN_FILENO, cast[pointer](addr buffer[0]),
331+
len(buffer))
332+
if res < 0:
333+
let errCode = ioLastError()
334+
discard tcSetAttr(STDIN_FILENO, TCSADRAIN, addr(old))
335+
return err(errCode)
307336

308-
if tcSetAttr(STDIN_FILENO, TCSADRAIN, addr(old)) != cint(0):
309-
return err(ioLastError())
310-
res
337+
if tcSetAttr(STDIN_FILENO, TCSADRAIN, addr(old)) != cint(0):
338+
return err(ioLastError())
339+
res
311340

312-
# Truncate additional bytes from buffer.
313-
buffer.setLen(min(maxBytes, bytesRead))
314-
# Trim LF in result string
315-
if len(buffer) > 0:
316-
if buffer[^1] == char(0x0A):
317-
buffer.setLen(len(buffer) - 1)
318-
ok(buffer)
341+
# Truncate additional bytes from buffer.
342+
buffer.setLen(bytesRead)
343+
344+
# Trim LF in result string
345+
if len(buffer) > 0:
346+
if buffer[^1] == char(0x0A):
347+
buffer.setLen(len(buffer) - 1)
348+
buffer.add(char(0x00))
349+
350+
# Conversion of console input into wide characters sequence.
351+
let wres = mbstowcs(uint32, buffer)
352+
if wres.isOk():
353+
# Trim wide character sequence to `maxChars` number of characters.
354+
var wbuffer = wres.get()
355+
if maxChars < len(wbuffer):
356+
wbuffer.setLen(maxChars)
357+
# Conversion of wide characters sequence to UTF-8 encoded string.
358+
let ures = wbuffer.wcharToUtf8()
359+
if ures.isOk():
360+
ok(ures.get())
361+
else:
362+
err(IoErrorCode(EILSEQ))
363+
else:
364+
err(IoErrorCode(EILSEQ))
319365

320366
proc readConsolePassword*(prompt: string,
321-
maxBytes = 32768): IoResult[string] =
322-
## Reads a password from stdin without printing it with length in bytes up to
323-
## ``maxBytes``.
367+
maxChars = 32768): IoResult[string] =
368+
## Reads a password from stdin without printing it with length in characters
369+
## up to ``maxChars``.
324370
##
325371
## This procedure supports reading of UTF-8 encoded passwords from console or
326-
## redirected pipe. But ``maxBytes`` will limit
372+
## redirected pipe.
327373
##
328374
## Before reading password ``prompt`` will be printed.
329375
##
330-
## Please note that ``maxBytes`` should be in range (0, 32768].
331-
doAssert(maxBytes > 0 and maxBytes <= 32768,
332-
"maxBytes should be integer in (0, 32768]")
376+
## Please note that ``maxChars`` should be in range (0, 32768].
377+
doAssert(maxChars > 0 and maxChars <= 32768,
378+
"maxChars should be integer in (0, 32768]")
333379
? writeConsoleOutput(prompt)
334-
let res = ? readConsoleInput(maxBytes)
380+
let res = ? readConsoleInput(maxChars)
335381
# `\p` is platform specific newline: CRLF on Windows, LF on Unix
336382
? writeConsoleOutput("\p")
337383
ok(res)
384+
385+
when isMainModule:
386+
echo readConsolePassword("Enter password: ", 4)

0 commit comments

Comments
 (0)