Skip to content

Commit f26f75c

Browse files
committed
Merge branch 'upstream-master'
2 parents 365dabb + c6a8c30 commit f26f75c

26 files changed

+1252
-1170
lines changed

CMakeLists.txt

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,19 @@
1-
cmake_minimum_required(VERSION 2.8.9)
2-
3-
# prevent ugly developer warnings because version is set directly, not through project()
4-
# it should be redone properly by using VERSION in project() if on CMake 3.x
5-
if(CMAKE_MAJOR_VERSION GREATER 2)
6-
cmake_policy(SET CMP0048 OLD)
7-
endif()
8-
1+
cmake_minimum_required(VERSION 3.0)
92
project(cmark-gfm)
103

4+
set(PROJECT_VERSION_MAJOR 0)
5+
set(PROJECT_VERSION_MINOR 29)
6+
set(PROJECT_VERSION_PATCH 0)
7+
set(PROJECT_VERSION_GFM 0)
8+
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM})
9+
1110
include("FindAsan.cmake")
1211
include("CheckFileOffsetBits.cmake")
1312

1413
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
1514
message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
1615
endif()
1716

18-
set(PROJECT_NAME "cmark-gfm")
19-
20-
set(PROJECT_VERSION_MAJOR 0)
21-
set(PROJECT_VERSION_MINOR 28)
22-
set(PROJECT_VERSION_PATCH 3)
23-
set(PROJECT_VERSION_GFM 20)
24-
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM} )
25-
2617
option(CMARK_TESTS "Build cmark-gfm tests and enable testing" ON)
2718
option(CMARK_STATIC "Build static libcmark-gfm library" ON)
2819
option(CMARK_SHARED "Build shared libcmark-gfm library" ON)

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ prof:
7575
afl:
7676
@[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; }
7777
mkdir -p $(BUILDDIR)
78-
cd $(BUILDDIR) && cmake .. -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang
78+
cd $(BUILDDIR) && cmake .. -DCMARK_TESTS=0 -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang
7979
$(MAKE)
8080
$(AFL_PATH)/afl-fuzz \
8181
-i test/afl_test_cases \

README.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,13 @@ be found in the man pages in the `man` subdirectory.
163163
Security
164164
--------
165165

166-
By default, the library will scrub raw HTML and potentially dangerous links
167-
(`javascript:`, `vbscript:`, `data:`, `file:`). Please note this is the
168-
_opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark)
169-
library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`.
170-
171-
To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe` with the
172-
command line program). If doing so, we recommend you use a HTML sanitizer
173-
specific to your needs to protect against [XSS
166+
By default, the library will scrub raw HTML and potentially
167+
dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).
168+
169+
To allow these, use the option `CMARK_OPT_UNSAFE` (or
170+
`--unsafe`) with the command line program. If doing so, we
171+
recommend you use a HTML sanitizer specific to your needs to
172+
protect against [XSS
174173
attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
175174

176175
Contributing

man/man3/cmark-gfm.3

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.TH cmark-gfm 3 "October 17, 2018" "LOCAL" "Library Functions Manual"
1+
.TH cmark-gfm 3 "April 08, 2019" "LOCAL" "Library Functions Manual"
22
.SH
33
NAME
44
.PP
@@ -852,6 +852,36 @@ Include a \f[C]data\-sourcepos\f[] attribute on all block elements.
852852
.PP
853853
Render \f[C]softbreak\f[] elements as hard line breaks.
854854

855+
.PP
856+
.nf
857+
\fC
858+
.RS 0n
859+
#define CMARK_OPT_SAFE (1 << 3)
860+
.RE
861+
\f[]
862+
.fi
863+
864+
.PP
865+
\f[C]CMARK_OPT_SAFE\f[] is defined here for API compatibility, but it no
866+
longer has any effect. "Safe" mode is now the default: set
867+
\f[C]CMARK_OPT_UNSAFE\f[] to disable it.
868+
869+
.PP
870+
.nf
871+
\fC
872+
.RS 0n
873+
#define CMARK_OPT_UNSAFE (1 << 17)
874+
.RE
875+
\f[]
876+
.fi
877+
878+
.PP
879+
Render raw HTML and unsafe links (\f[C]javascript:\f[],
880+
\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
881+
\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
882+
\f[C]image/webp\f[] mime types). By default, raw HTML is replaced by a
883+
placeholder HTML comment. Unsafe links are replaced by empty strings.
884+
855885
.PP
856886
.nf
857887
\fC
@@ -979,23 +1009,6 @@ Use style attributes to align table cells instead of align attributes.
9791009
Include the remainder of the info string in code blocks in a separate
9801010
attribute.
9811011

982-
.PP
983-
.nf
984-
\fC
985-
.RS 0n
986-
#define CMARK_OPT_UNSAFE (1 << 17)
987-
.RE
988-
\f[]
989-
.fi
990-
991-
.PP
992-
Allow raw HTML and unsafe links, \f[C]javascript:\f[],
993-
\f[C]vbscript:\f[], \f[C]file:\f[], and all \f[C]data:\f[] URLs \-\- by
994-
default, only \f[C]image/png\f[], \f[C]image/gif\f[],
995-
\f[C]image/jpeg\f[], or \f[C]image/webp\f[] mime types are allowed.
996-
Without this option, raw HTML is replaced by a placeholder HTML comment,
997-
and unsafe links are replaced by empty strings.
998-
9991012
.SS
10001013
Version information
10011014

src/CMakeLists.txt

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ include("../CheckFileOffsetBits.cmake")
7878
CHECK_FILE_OFFSET_BITS()
7979

8080
add_executable(${PROGRAM} ${PROGRAM_SOURCES})
81-
add_compiler_export_flags()
8281

8382
if(CMARK_SHARED)
8483
target_link_libraries(${PROGRAM} libcmark-gfm-extensions libcmark-gfm)
@@ -97,14 +96,9 @@ set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}")
9796
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
9897
set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg")
9998

100-
add_definitions(-DLIBDIR=\"${CMAKE_BINARY_DIR}\")
101-
102-
if (${CMAKE_VERSION} VERSION_GREATER "1.8")
103-
set(CMAKE_C_VISIBILITY_PRESET hidden)
104-
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
105-
elseif(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
106-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
107-
endif ()
99+
# -fvisibility=hidden
100+
set(CMAKE_C_VISIBILITY_PRESET hidden)
101+
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
108102

109103
if (CMARK_SHARED)
110104
add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES})

src/blocks.c

Lines changed: 89 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
3636
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
3737
}
3838

39+
static bool S_last_line_checked(const cmark_node *node) {
40+
return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
41+
}
42+
3943
static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
4044
return (cmark_node_type)node->type;
4145
}
@@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
4751
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
4852
}
4953

54+
static void S_set_last_line_checked(cmark_node *node) {
55+
node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
56+
}
57+
5058
static CMARK_INLINE bool S_is_line_end_char(char c) {
5159
return (c == '\n' || c == '\r');
5260
}
@@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
121129
parser->root = document;
122130
parser->current = document;
123131

124-
parser->last_buffer_ended_with_cr = false;
125-
126132
parser->syntax_extensions = saved_exts;
127133
parser->inline_syntax_extensions = saved_inline_exts;
128134
parser->options = saved_options;
@@ -234,26 +240,43 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
234240

235241
// Check to see if a node ends with a blank line, descending
236242
// if needed into lists and sublists.
237-
static bool ends_with_blank_line(cmark_node *node) {
238-
cmark_node *cur = node;
239-
while (cur != NULL) {
240-
if (S_last_line_blank(cur)) {
241-
return true;
242-
}
243-
if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) {
244-
cur = cur->last_child;
245-
} else {
246-
cur = NULL;
247-
}
243+
static bool S_ends_with_blank_line(cmark_node *node) {
244+
if (S_last_line_checked(node)) {
245+
return(S_last_line_blank(node));
246+
} else if ((S_type(node) == CMARK_NODE_LIST ||
247+
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
248+
S_set_last_line_checked(node);
249+
return(S_ends_with_blank_line(node->last_child));
250+
} else {
251+
S_set_last_line_checked(node);
252+
return (S_last_line_blank(node));
248253
}
249-
return false;
254+
}
255+
256+
// returns true if content remains after link defs are resolved.
257+
static bool resolve_reference_link_definitions(
258+
cmark_parser *parser,
259+
cmark_node *b) {
260+
bufsize_t pos;
261+
cmark_strbuf *node_content = &b->content;
262+
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
263+
while (chunk.len && chunk.data[0] == '[' &&
264+
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
265+
parser->refmap))) {
266+
267+
chunk.data += pos;
268+
chunk.len -= pos;
269+
}
270+
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
271+
return !is_blank(&b->content, 0);
250272
}
251273

252274
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
253275
bufsize_t pos;
254276
cmark_node *item;
255277
cmark_node *subitem;
256278
cmark_node *parent;
279+
bool has_content;
257280

258281
parent = b->parent;
259282
assert(b->flags &
@@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
283306
switch (S_type(b)) {
284307
case CMARK_NODE_PARAGRAPH:
285308
{
286-
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
287-
while (chunk.len && chunk.data[0] == '[' &&
288-
(pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
289-
290-
chunk.data += pos;
291-
chunk.len -= pos;
292-
}
293-
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
294-
if (is_blank(node_content, 0)) {
309+
has_content = resolve_reference_link_definitions(parser, b);
310+
if (!has_content) {
295311
// remove blank node (former reference def)
296312
cmark_node_free(b);
297313
}
@@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
343359
// spaces between them:
344360
subitem = item->first_child;
345361
while (subitem) {
346-
if (ends_with_blank_line(subitem) && (item->next || subitem->next)) {
362+
if ((item->next || subitem->next) &&
363+
S_ends_with_blank_line(subitem)) {
347364
b->as.list.tight = false;
348365
break;
349366
}
@@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
748765
}
749766
}
750767

768+
// Check for thematic break. On failure, return 0 and update
769+
// thematic_break_kill_pos with the index at which the
770+
// parse fails. On success, return length of match.
771+
// "...three or more hyphens, asterisks,
772+
// or underscores on a line by themselves. If you wish, you may use
773+
// spaces between the hyphens or asterisks."
774+
static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
775+
bufsize_t offset) {
776+
bufsize_t i;
777+
char c;
778+
char nextc = '\0';
779+
int count;
780+
i = offset;
781+
c = peek_at(input, i);
782+
if (!(c == '*' || c == '_' || c == '-')) {
783+
parser->thematic_break_kill_pos = i;
784+
return 0;
785+
}
786+
count = 1;
787+
while ((nextc = peek_at(input, ++i))) {
788+
if (nextc == c) {
789+
count++;
790+
} else if (nextc != ' ' && nextc != '\t') {
791+
break;
792+
}
793+
}
794+
if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
795+
return (i - offset) + 1;
796+
} else {
797+
parser->thematic_break_kill_pos = i;
798+
return 0;
799+
}
800+
}
801+
751802
// Find first nonspace character from current offset, setting
752803
// parser->first_nonspace, parser->first_nonspace_column,
753804
// parser->indent, and parser->blank. Does not advance parser->offset.
@@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
10401091
bufsize_t matched = 0;
10411092
int lev = 0;
10421093
bool save_partially_consumed_tab;
1094+
bool has_content;
10431095
int save_offset;
10441096
int save_column;
10451097

@@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11121164
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
11131165
(lev =
11141166
scan_setext_heading_line(input, parser->first_nonspace))) {
1115-
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
1116-
(*container)->as.heading.level = lev;
1117-
(*container)->as.heading.setext = true;
1118-
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1167+
// finalize paragraph, resolving reference links
1168+
has_content = resolve_reference_link_definitions(parser, *container);
1169+
1170+
if (has_content) {
1171+
1172+
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
1173+
(*container)->as.heading.level = lev;
1174+
(*container)->as.heading.setext = true;
1175+
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
1176+
}
11191177
} else if (!indented &&
11201178
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
1121-
(matched = scan_thematic_break(input, parser->first_nonspace))) {
1179+
(parser->thematic_break_kill_pos <= parser->first_nonspace) &&
1180+
(matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
11221181
// it's only now that we know the line is not part of a setext heading:
11231182
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
11241183
parser->first_nonspace + 1);
@@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13771436
parser->column = 0;
13781437
parser->first_nonspace = 0;
13791438
parser->first_nonspace_column = 0;
1439+
parser->thematic_break_kill_pos = 0;
13801440
parser->indent = 0;
13811441
parser->blank = false;
13821442
parser->partially_consumed_tab = false;

src/buffer.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "config.h"
1111
#include "cmark_ctype.h"
1212
#include "buffer.h"
13-
#include "memory.h"
1413

1514
/* Used as default value for cmark_strbuf->ptr so that people can always
1615
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.

src/chunk.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include <assert.h>
77
#include "cmark-gfm.h"
88
#include "buffer.h"
9-
#include "memory.h"
109
#include "cmark_ctype.h"
1110

1211
#define CMARK_CHUNK_EMPTY \

0 commit comments

Comments
 (0)