Skip to content

Commit c1c3f25

Browse files
committed
syslogd: unescape Linux /dev/kmsg messages before sanitization
Linux's /dev/kmsg interface escapes all non-printable characters and backslashes using C-style hex encoding (\xHH format). Preventing the recent UTF-8 sanitization improvements from working correctly, since UTF-8 sequences arrive as escaped text like "\xe2\x80\x94" rather than as actual bytes, see [1] for details. This commit implements "unescaping" of the kernel's C-style format before applying the UTF-8-aware sanitization. Ensuring that UTF-8 content also in kernel messages is properly preserved when using the -8 flag. [1]: https://www.kernel.org/doc/Documentation/ABI/testing/dev-kmsg Signed-off-by: Joachim Wiberg <[email protected]>
1 parent 1037229 commit c1c3f25

File tree

1 file changed

+77
-4
lines changed

1 file changed

+77
-4
lines changed

src/syslogd.c

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ static int KernLog = 1; /* Track kernel logs by default */
155155
static int KeepKernFac; /* Keep remotely logged kernel facility */
156156
static int KeepKernTime; /* Keep kernel timestamp, evern after initial read */
157157
static int KeepKernConsole; /* Keep kernel logging to console */
158+
static int IsLinuxKmsg; /* Set if reading from Linux /dev/kmsg */
158159

159160
static int rotate_opt; /* Set if command line option has been given (wins) */
160161
static off_t RotateSz = 0; /* Max file size (bytes) before rotating, disabled by default */
@@ -651,8 +652,10 @@ int main(int argc, char *argv[])
651652
_PATH_KLOG);
652653
else
653654
kern_console_off();
654-
} else
655+
} else {
656+
IsLinuxKmsg = 1;
655657
kern_console_off();
658+
}
656659
}
657660
no_klogd:
658661
consfile.f_type = F_CONSOLE;
@@ -1062,6 +1065,61 @@ utf8_valid(const unsigned char *in, size_t len)
10621065
return 1;
10631066
}
10641067

1068+
/*
1069+
* Unescapes Linux /dev/kmsg messages that use C-style hex encoding.
1070+
* Converts "\xHH" sequences back to bytes and "\\" back to "\".
1071+
* Returns the new length of the unescaped string.
1072+
*/
1073+
static size_t kmsg_unescape(char *msg)
1074+
{
1075+
char *src, *dst;
1076+
int hi, lo;
1077+
1078+
src = dst = msg;
1079+
while (*src) {
1080+
if (*src == '\\' && src[1]) {
1081+
if (src[1] == 'x' && src[2] && src[3]) {
1082+
/* Decode \xHH */
1083+
hi = src[2];
1084+
lo = src[3];
1085+
1086+
/* Convert hex digits to values */
1087+
if (hi >= '0' && hi <= '9')
1088+
hi = hi - '0';
1089+
else if (hi >= 'a' && hi <= 'f')
1090+
hi = hi - 'a' + 10;
1091+
else if (hi >= 'A' && hi <= 'F')
1092+
hi = hi - 'A' + 10;
1093+
else
1094+
goto copy_literal;
1095+
1096+
if (lo >= '0' && lo <= '9')
1097+
lo = lo - '0';
1098+
else if (lo >= 'a' && lo <= 'f')
1099+
lo = lo - 'a' + 10;
1100+
else if (lo >= 'A' && lo <= 'F')
1101+
lo = lo - 'A' + 10;
1102+
else
1103+
goto copy_literal;
1104+
1105+
*dst++ = (char)((hi << 4) | lo);
1106+
src += 4;
1107+
continue;
1108+
} else if (src[1] == '\\') {
1109+
/* Decode \\ to \ */
1110+
*dst++ = '\\';
1111+
src += 2;
1112+
continue;
1113+
}
1114+
}
1115+
copy_literal:
1116+
*dst++ = *src++;
1117+
}
1118+
*dst = '\0';
1119+
1120+
return dst - msg;
1121+
}
1122+
10651123
/*
10661124
* Removes characters from log messages that are unsafe to display.
10671125
* Preserves valid UTF-8 sequences, including BOM, with -8 flag.
@@ -1773,9 +1831,24 @@ void printsys(char *msg)
17731831
parsemsg_rfc3164_app_name_procid(&p, &buffer.app_name, &buffer.proc_id);
17741832

17751833
q = lp;
1776-
while (*p != '\0' && (c = *p++) != '\n' && q < &line[MAXLINE])
1777-
*q++ = c;
1778-
*q = '\0';
1834+
if (IsLinuxKmsg) {
1835+
char tmp[MAXLINE + 1];
1836+
char *t = tmp;
1837+
1838+
while (*p != '\0' && *p != '\n' && t < &tmp[MAXLINE])
1839+
*t++ = *p++;
1840+
*t = '\0';
1841+
1842+
/* Unescape \xHH sequences and \\ */
1843+
kmsg_unescape(tmp);
1844+
1845+
/* Sanitize the unescaped message with UTF-8 support */
1846+
parsemsg_remove_unsafe_characters(tmp, lp, MAXLINE);
1847+
} else {
1848+
while (*p != '\0' && (c = *p++) != '\n' && q < &line[MAXLINE])
1849+
*q++ = c;
1850+
*q = '\0';
1851+
}
17791852

17801853
logmsg(&buffer);
17811854
}

0 commit comments

Comments
 (0)