Skip to content

Commit 6208f86

Browse files
committed
xmllint: Support compressed input from stdin
Another regression related to reading from stdin. Making a "-" filename read from stdin was deeply baked into the core IO code but is inherently insecure. I really want to reenable this dangerous feature as sparingly as possible. Add a new hidden parser option to make xmllint work. This will likely turn into a public option that must be opted in later. Allow compressed stdin in xmlReadFile to support xmlstarlet and older versions of xsltproc. So far, these are the only known command-line tools that rely on "-" meaning stdin.
1 parent 7d4df58 commit 6208f86

File tree

6 files changed

+121
-73
lines changed

6 files changed

+121
-73
lines changed

include/private/io.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ XML_HIDDEN xmlParserInputBufferPtr
2424
xmlNewInputBufferMemory(const void *mem, size_t size, int flags,
2525
xmlCharEncoding enc);
2626

27+
XML_HIDDEN int
28+
xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip);
29+
2730
#ifdef LIBXML_OUTPUT_ENABLED
2831
XML_HIDDEN xmlOutputBufferPtr
2932
xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);

include/private/parser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix);
9090

9191
#define XML_INPUT_BUF_STATIC (1u << 1)
9292
#define XML_INPUT_BUF_ZERO_TERMINATED (1u << 2)
93+
#define XML_INPUT_UNZIP (1u << 3)
94+
95+
/* Internal parser option */
96+
#define XML_PARSE_UNZIP (1 << 24)
9397

9498
XML_HIDDEN xmlParserInputPtr
9599
xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,

parser.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13890,7 +13890,8 @@ xmlReadFile(const char *filename, const char *encoding, int options)
1389013890
* should be removed at some point.
1389113891
*/
1389213892
if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13893-
input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, encoding, 0);
13893+
input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, encoding,
13894+
XML_INPUT_UNZIP);
1389413895
else
1389513896
input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
1389613897

@@ -14141,14 +14142,18 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
1414114142
const char *URL, const char *encoding, int options)
1414214143
{
1414314144
xmlParserInputPtr input;
14145+
int inputFlags;
1414414146

1414514147
if (ctxt == NULL)
1414614148
return(NULL);
1414714149

1414814150
xmlCtxtReset(ctxt);
1414914151
xmlCtxtUseOptions(ctxt, options);
1415014152

14151-
input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14153+
inputFlags = 0;
14154+
if (options & XML_PARSE_UNZIP)
14155+
inputFlags |= XML_INPUT_UNZIP;
14156+
input = xmlNewInputFd(ctxt, URL, fd, encoding, inputFlags);
1415214157

1415314158
return(xmlCtxtParseDocument(ctxt, input));
1415414159
}

parserInternals.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,18 +1715,23 @@ xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url,
17151715
*/
17161716
xmlParserInputPtr
17171717
xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url,
1718-
int fd, const char *encoding, int flags ATTRIBUTE_UNUSED) {
1718+
int fd, const char *encoding, int flags) {
17191719
xmlParserInputBufferPtr buf;
17201720

17211721
if ((ctxt == NULL) || (fd < 0))
17221722
return(NULL);
17231723

1724-
buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1724+
buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
17251725
if (buf == NULL) {
17261726
xmlCtxtErrMemory(ctxt);
17271727
return(NULL);
17281728
}
17291729

1730+
if (xmlInputFromFd(buf, fd, (flags & XML_INPUT_UNZIP) != 0) < 0) {
1731+
xmlFreeParserInputBuffer(buf);
1732+
return(NULL);
1733+
}
1734+
17301735
return(xmlNewInputInternal(ctxt, buf, url, encoding));
17311736
}
17321737

xmlIO.c

Lines changed: 93 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,65 +1158,36 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) {
11581158
return(1);
11591159
}
11601160

1161-
/**
1162-
* xmlInputDefaultOpen:
1163-
* @buf: input buffer to be filled
1164-
* @filename: filename or URI
1165-
*
1166-
* Returns an xmlParserErrors code.
1167-
*/
1168-
static int
1169-
xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename) {
1170-
int ret;
1171-
int fd;
1172-
1173-
#ifdef LIBXML_FTP_ENABLED
1174-
if (xmlIOFTPMatch(filename)) {
1175-
buf->context = xmlIOFTPOpen(filename);
1176-
1177-
if (buf->context != NULL) {
1178-
buf->readcallback = xmlIOFTPRead;
1179-
buf->closecallback = xmlIOFTPClose;
1180-
return(XML_ERR_OK);
1181-
}
1182-
}
1183-
#endif /* LIBXML_FTP_ENABLED */
1184-
1185-
#ifdef LIBXML_HTTP_ENABLED
1186-
if (xmlIOHTTPMatch(filename)) {
1187-
buf->context = xmlIOHTTPOpen(filename);
1188-
1189-
if (buf->context != NULL) {
1190-
buf->readcallback = xmlIOHTTPRead;
1191-
buf->closecallback = xmlIOHTTPClose;
1192-
return(XML_ERR_OK);
1193-
}
1194-
}
1195-
#endif /* LIBXML_HTTP_ENABLED */
1161+
int
1162+
xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip) {
1163+
int copy;
11961164

1197-
if (!xmlFileMatch(filename))
1198-
return(XML_IO_ENOENT);
1165+
(void) unzip;
11991166

12001167
#ifdef LIBXML_LZMA_ENABLED
1201-
{
1168+
if (unzip) {
12021169
xzFile xzStream;
1170+
off_t pos;
12031171

1204-
ret = xmlFdOpen(filename, 0, &fd);
1205-
if (ret != XML_ERR_OK)
1206-
return(ret);
1172+
pos = lseek(fd, 0, SEEK_CUR);
12071173

1208-
xzStream = __libxml2_xzdopen(filename, fd, "rb");
1174+
copy = dup(fd);
1175+
if (copy == -1)
1176+
return(xmlIOErr(0, "dup()"));
1177+
1178+
xzStream = __libxml2_xzdopen("?", copy, "rb");
12091179

12101180
if (xzStream == NULL) {
1211-
close(fd);
1181+
close(copy);
12121182
} else {
1213-
/*
1214-
* Non-regular files like pipes can't be reopened.
1215-
* If a file isn't seekable, we pipe uncompressed
1216-
* input through xzlib.
1217-
*/
1218-
if ((lseek(fd, 0, SEEK_CUR) < 0) ||
1219-
(__libxml2_xzcompressed(xzStream) > 0)) {
1183+
if ((__libxml2_xzcompressed(xzStream) > 0) ||
1184+
/* Try to rewind if not gzip compressed */
1185+
(pos < 0) ||
1186+
(lseek(fd, pos, SEEK_SET) < 0)) {
1187+
/*
1188+
* If a file isn't seekable, we pipe uncompressed
1189+
* input through xzlib.
1190+
*/
12201191
buf->context = xzStream;
12211192
buf->readcallback = xmlXzfileRead;
12221193
buf->closecallback = xmlXzfileClose;
@@ -1231,25 +1202,29 @@ xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename) {
12311202
#endif /* LIBXML_LZMA_ENABLED */
12321203

12331204
#ifdef LIBXML_ZLIB_ENABLED
1234-
{
1205+
if (unzip) {
12351206
gzFile gzStream;
1207+
off_t pos;
12361208

1237-
ret = xmlFdOpen(filename, 0, &fd);
1238-
if (ret != XML_ERR_OK)
1239-
return(ret);
1209+
pos = lseek(fd, 0, SEEK_CUR);
1210+
1211+
copy = dup(fd);
1212+
if (copy == -1)
1213+
return(xmlIOErr(0, "dup()"));
12401214

1241-
gzStream = gzdopen(fd, "rb");
1215+
gzStream = gzdopen(copy, "rb");
12421216

12431217
if (gzStream == NULL) {
1244-
close(fd);
1218+
close(copy);
12451219
} else {
1246-
/*
1247-
* Non-regular files like pipes can't be reopened.
1248-
* If a file isn't seekable, we pipe uncompressed
1249-
* input through zlib.
1250-
*/
1251-
if ((lseek(fd, 0, SEEK_CUR) < 0) ||
1252-
(gzdirect(gzStream) == 0)) {
1220+
if ((gzdirect(gzStream) == 0) ||
1221+
/* Try to rewind if not gzip compressed */
1222+
(pos < 0) ||
1223+
(lseek(fd, pos, SEEK_SET) < 0)) {
1224+
/*
1225+
* If a file isn't seekable, we pipe uncompressed
1226+
* input through zlib.
1227+
*/
12531228
buf->context = gzStream;
12541229
buf->readcallback = xmlGzfileRead;
12551230
buf->closecallback = xmlGzfileClose;
@@ -1263,16 +1238,67 @@ xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename) {
12631238
}
12641239
#endif /* LIBXML_ZLIB_ENABLED */
12651240

1266-
ret = xmlFdOpen(filename, 0, &fd);
1267-
if (ret != XML_ERR_OK)
1268-
return(ret);
1241+
copy = dup(fd);
1242+
if (copy == -1)
1243+
return(xmlIOErr(0, "dup()"));
12691244

1270-
buf->context = (void *) (ptrdiff_t) fd;
1245+
buf->context = (void *) (ptrdiff_t) copy;
12711246
buf->readcallback = xmlFdRead;
12721247
buf->closecallback = xmlFdClose;
1248+
12731249
return(XML_ERR_OK);
12741250
}
12751251

1252+
/**
1253+
* xmlInputDefaultOpen:
1254+
* @buf: input buffer to be filled
1255+
* @filename: filename or URI
1256+
*
1257+
* Returns an xmlParserErrors code.
1258+
*/
1259+
static int
1260+
xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename) {
1261+
int ret;
1262+
int fd;
1263+
1264+
#ifdef LIBXML_FTP_ENABLED
1265+
if (xmlIOFTPMatch(filename)) {
1266+
buf->context = xmlIOFTPOpen(filename);
1267+
1268+
if (buf->context != NULL) {
1269+
buf->readcallback = xmlIOFTPRead;
1270+
buf->closecallback = xmlIOFTPClose;
1271+
return(XML_ERR_OK);
1272+
}
1273+
}
1274+
#endif /* LIBXML_FTP_ENABLED */
1275+
1276+
#ifdef LIBXML_HTTP_ENABLED
1277+
if (xmlIOHTTPMatch(filename)) {
1278+
buf->context = xmlIOHTTPOpen(filename);
1279+
1280+
if (buf->context != NULL) {
1281+
buf->readcallback = xmlIOHTTPRead;
1282+
buf->closecallback = xmlIOHTTPClose;
1283+
return(XML_ERR_OK);
1284+
}
1285+
}
1286+
#endif /* LIBXML_HTTP_ENABLED */
1287+
1288+
if (!xmlFileMatch(filename))
1289+
return(XML_IO_ENOENT);
1290+
1291+
ret = xmlFdOpen(filename, 0, &fd);
1292+
if (ret != XML_ERR_OK)
1293+
return(ret);
1294+
1295+
ret = xmlInputFromFd(buf, fd, /* unzip */ 1);
1296+
1297+
close(fd);
1298+
1299+
return(ret);
1300+
}
1301+
12761302
#ifdef LIBXML_OUTPUT_ENABLED
12771303
/**
12781304
* xmlOutputDefaultOpen:

xmllint.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@
9595
#define STDIN_FILENO 0
9696
#endif
9797

98+
/* Internal parser option */
99+
#define XML_PARSE_UNZIP (1 << 24)
100+
98101
typedef enum {
99102
XMLLINT_RETURN_OK = 0, /* No error */
100103
XMLLINT_ERR_UNCLASS = 1, /* Unclassified */
@@ -1648,7 +1651,8 @@ testSAX(const char *filename) {
16481651
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
16491652

16501653
if (strcmp(filename, "-") == 0)
1651-
xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
1654+
xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL,
1655+
options | XML_PARSE_UNZIP);
16521656
else
16531657
xmlCtxtReadFile(ctxt, filename, NULL, options);
16541658

@@ -2333,7 +2337,8 @@ parseFile(const char *filename, xmlParserCtxtPtr rectxt) {
23332337
#endif
23342338
} else {
23352339
if (strcmp(filename, "-") == 0)
2336-
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
2340+
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL,
2341+
options | XML_PARSE_UNZIP);
23372342
else
23382343
doc = xmlCtxtReadFile(ctxt, filename, NULL, options);
23392344
}

0 commit comments

Comments
 (0)