Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ some extensions or allowing some deviations from the specification.

* With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are
disabled.

* With the flag `MD_FLAG_REDDITSLASHDETECTION`, Reddit subreddit and user links such as r/test or /u/me are detected
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename the flag for consistency with other flags. No other flag uses the word "detection". I am not reddit user, so IDK whether something like MD_FLAG_REDDITAUTOLINKS but it could give you an idea.



## Input/Output Encoding
Expand Down
7 changes: 7 additions & 0 deletions md2html/md2html.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ static const option cmdline_options[] = {
{ "version", 'v', 'v', OPTION_ARG_NONE },
{ "commonmark", 0, 'c', OPTION_ARG_NONE },
{ "github", 0, 'g', OPTION_ARG_NONE },
{ "reddit", 0, 'R', OPTION_ARG_NONE },
{ "fverbatim-entities", 0, 'E', OPTION_ARG_NONE },
{ "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE },
{ "fpermissive-url-autolinks", 0, 'U', OPTION_ARG_NONE },
Expand All @@ -208,6 +209,7 @@ static const option cmdline_options[] = {
{ "fcollapse-whitespace", 0, 'W', OPTION_ARG_NONE },
{ "ftables", 0, 'T', OPTION_ARG_NONE },
{ "fstrikethrough", 0, 'S', OPTION_ARG_NONE },
{ "freddit-autolinks", 0, 'r', OPTION_ARG_NONE },
{ 0 }
};

Expand All @@ -229,6 +231,7 @@ usage(void)
"(note these are equivalent to some combinations of flags below)\n"
" --commonmark CommonMark (this is default)\n"
" --github Github Flavored Markdown\n"
" --reddit Reddit's dialect of Markdown\n"
"\n"
"Markdown extension options:\n"
" --fcollapse-whitespace\n"
Expand All @@ -253,6 +256,8 @@ usage(void)
" --fno-html-spans\n"
" Disable raw HTML spans\n"
" --fno-html Same as --fno-html-blocks --fno-html-spans\n"
" --freddit-autolinks\n"
" Enable Reddit autolinks of the form /u/x, /r/x, u/x, r/x\n"
" --ftables Enable tables\n"
" --fstrikethrough Enable strikethrough spans\n"
);
Expand Down Expand Up @@ -288,6 +293,7 @@ cmdline_callback(int opt, char const* value, void* data)

case 'c': parser_flags = MD_DIALECT_COMMONMARK; break;
case 'g': parser_flags = MD_DIALECT_GITHUB; break;
case 'R': parser_flags = MD_DIALECT_REDDITPOST; break;

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a minor detail, but please swap 'r' and 'R' so that there is some consistency: Lower case for dialect options, upper case for extensions.

case 'E': renderer_flags |= MD_RENDER_FLAG_VERBATIM_ENTITIES; break;
case 'A': parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
Expand All @@ -302,6 +308,7 @@ cmdline_callback(int opt, char const* value, void* data)
case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
case 'T': parser_flags |= MD_FLAG_TABLES; break;
case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break;
case 'r': parser_flags |= MD_FLAG_REDDITSLASHDETECTION;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing break; here, isn't it?


default:
fprintf(stderr, "Illegal option: %s\n", value);
Expand Down
168 changes: 144 additions & 24 deletions md4c/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -2696,7 +2696,8 @@ md_build_mark_char_map(MD_CTX* ctx)
ctx->mark_char_map['!'] = 1;
ctx->mark_char_map[']'] = 1;
ctx->mark_char_map['\0'] = 1;

if (ctx->r.flags & MD_FLAG_REDDITSLASHDETECTION)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uff, indentation using tab chars here and on some other places of your patch. Please use just spaces as in the rest of the source file(s).

ctx->mark_char_map['/'] = 1;
if(ctx->r.flags & MD_FLAG_STRIKETHROUGH)
ctx->mark_char_map['~'] = 1;

Expand Down Expand Up @@ -2912,11 +2913,48 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
/* Push a dummy as a reserve for a closer. */
PUSH_MARK('D', off, off, 0);
}

off++;
continue;
}

/* A potential permissive Reddit autolink */
if(ch == _T('/')) {
if(line->beg + 1 <= off && (CH(off - 1) == 'u' || CH(off - 1) == 'r') &&
(line->beg + 1 == off ||
(CH(off - 2) != '/' && (ISUNICODEPUNCTBEFORE(off - 1) || ISUNICODEWHITESPACE(off - 2)))) &&
line->end > off + 1 && ISALNUM(off + 1))
{
OFF index = off + 2;
while (index <= line->end)
{
if (!(ISALNUM(index) || (CH(index) == '_')))
break;
index++;
}
/* u/something or r/something */
PUSH_MARK('/', off - 1, index, MD_MARK_RESOLVED);
off = index;
}
else if (line->end > off + 3 && ((CH(off + 2) == '/') && (CH(off + 1) == 'u' || CH(off + 1) == 'r') &&
ISALNUM(off + 3)))
{
OFF index = off + 4;
while (index <= line->end)
{
if (!(ISALNUM(index) || (CH(index) == '_')))
break;
index++;
}
PUSH_MARK('/', off, index, MD_MARK_RESOLVED);
off = index;
}
else
{
off++;
}
continue;
}

/* A potential permissive URL autolink. */
if(ch == _T(':')) {
static struct {
Expand Down Expand Up @@ -3605,6 +3643,9 @@ md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
OFF off = opener->end;
int seen_dot = FALSE;
int seen_underscore_or_hyphen[2] = { FALSE, FALSE };
if (opener->end == opener->beg) {
opener->ch = '/';
}

/* Check for domain. */
while(off < ctx->size) {
Expand Down Expand Up @@ -3711,7 +3752,6 @@ md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
closer->end = end;
md_resolve_range(ctx, NULL, mark_index, closer_index);
}

static inline void
md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
int mark_beg, int mark_end, const CHAR* mark_chars)
Expand Down Expand Up @@ -3752,6 +3792,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
case '_': md_analyze_underscore(ctx, i); break;
case '~': md_analyze_tilde(ctx, i); break;
case '.': /* Pass through. */
case '/': /* Pass through */
case ':': md_analyze_permissive_url_autolink(ctx, i); break;
case '@': md_analyze_permissive_email_autolink(ctx, i); break;
}
Expand Down Expand Up @@ -3965,27 +4006,106 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
case '@': /* Permissive e-mail autolink. */
case ':': /* Permissive URL autolink. */
case '.': /* Permissive WWW autolink. */
{
const MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
const MD_MARK* closer = &ctx->marks[opener->next];
const CHAR* dest = STR(opener->end);
SZ dest_size = closer->beg - opener->end;

if(opener->ch == '@' || opener->ch == '.') {
dest_size += 7;
MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
memcpy(ctx->buffer,
(opener->ch == '@' ? _T("mailto:") : _T("http://")),
7 * sizeof(CHAR));
memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
dest = ctx->buffer;
}

MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
break;
}

{
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove the bogus blank lines in the code below?


const MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);

const MD_MARK* closer = &ctx->marks[opener->next];

const CHAR* dest = STR(opener->end);

SZ dest_size = closer->beg - opener->end;



if (opener->ch == '@' || opener->ch == '.') {

dest_size += 7;

MD_TEMP_BUFFER(dest_size * sizeof(CHAR));

memcpy(ctx->buffer,

(opener->ch == '@' ? _T("mailto:") : _T("http://")),

7 * sizeof(CHAR));

memcpy(ctx->buffer + 7, dest, (dest_size - 7) * sizeof(CHAR));

dest = ctx->buffer;

}



MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),

MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));

break;

}
case '/': /* Permissive Reddit autolinks */
{
MD_REDDIT_SLASH_DETAIL det;
if (CH(mark->beg) == '/')
{
det.name = ctx->text + mark->beg + 3;
det.size = mark->end - mark->beg - 3;
if (CH(mark->beg + 1) == 'r')
{
det.type = MD_REDDIT_SUBREDDIT;
}
else
{
det.type = MD_REDDIT_USER;
}
}
else // u/something or r/something instead of /r/something
{
det.name = ctx->text + mark->beg + 2;
det.size = mark->end - mark->beg - 2;
if (CH(mark->beg) == 'r')
{
det.type = MD_REDDIT_SUBREDDIT;
}
else
{
det.type = MD_REDDIT_USER;
}
}
if (ctx->r.flags & MD_FLAG_REDDIT_SLASHES_AS_LINKS)
{
MD_SPAN_A_DETAIL linkDet;
linkDet.href.size = (24 + (det.size));
linkDet.href.substr_offsets = NULL;
linkDet.href.substr_types = NULL;
MD_CHAR* link = (MD_CHAR*)malloc(sizeof(MD_CHAR) * linkDet.href.size);
linkDet.href.text = link;
linkDet.title.size = 0;
linkDet.title.substr_offsets = NULL;
linkDet.title.substr_types = NULL;
linkDet.title.text = NULL;
memcpy(link, _T("https://www.reddit.com/"), sizeof(MD_CHAR) * 23);
if (det.type == MD_REDDIT_SUBREDDIT)
link[23] = _T('r');
else
link[23] = _T('u');
link[24] = _T('/');
memcpy(link + 25, det.name, sizeof(MD_CHAR) * det.size);
MD_ENTER_SPAN(MD_SPAN_A, &linkDet);
MD_TEXT(text_type, STR(mark->beg), mark->end - mark->beg);
MD_LEAVE_SPAN(MD_SPAN_A, &linkDet);
free(link);
}
else
{
MD_ENTER_SPAN(MD_REDDIT_SLASH_LINK, &det);
MD_TEXT(text_type, STR(mark->beg), mark->end - mark->beg);
MD_LEAVE_SPAN(MD_REDDIT_SLASH_LINK, &det);
}

}break;
case '&': /* Entity. */
MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
break;
Expand Down
19 changes: 15 additions & 4 deletions md4c/md4c.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ typedef enum MD_SPANTYPE {
/* <del>...</del>
* Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
*/
MD_SPAN_DEL
MD_SPAN_DEL,
MD_REDDIT_SLASH_LINK
} MD_SPANTYPE;

/* Text is the actual textual contents of span. */
Expand Down Expand Up @@ -172,8 +173,10 @@ typedef enum MD_TEXTTYPE {
* The text contains verbatim '\n' for the new lines. */
MD_TEXT_HTML
} MD_TEXTTYPE;


typedef enum MD_REDDIT_SLASH_TYPE
{
MD_REDDIT_USER, MD_REDDIT_SUBREDDIT
} MD_REDDIT_SLASH_TYPE;
/* Alignment enumeration. */
typedef enum MD_ALIGN {
MD_ALIGN_DEFAULT = 0, /* When unspecified. */
Expand Down Expand Up @@ -245,7 +248,12 @@ typedef struct MD_SPAN_A_DETAIL {
MD_ATTRIBUTE href;
MD_ATTRIBUTE title;
} MD_SPAN_A_DETAIL;

typedef struct MD_REDDIT_SLASH_DETAIL
{
MD_REDDIT_SLASH_TYPE type; //whether it's a user or subreddit
unsigned char size;
MD_CHAR * name;
}MD_REDDIT_SLASH_DETAIL;
/* Detailed info for MD_SPAN_IMG. */
typedef struct MD_SPAN_IMG_DETAIL {
MD_ATTRIBUTE src;
Expand All @@ -271,6 +279,8 @@ typedef struct MD_SPAN_IMG_DETAIL {

#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
#define MD_FLAG_REDDITSLASHDETECTION 0x8000 /* Enable Reddit autolinks */
#define MD_FLAG_REDDIT_SLASHES_AS_LINKS 0x4000 //Instead of making Reddit links into special spans, make them into web links
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the logic related to the MD_FLAG_REDDIT_SLASHES_AS_LINKS has nothing to do with the parsing but only with output rendering, and therefore it would be IMHO better to place the related logic just into the md2html utility. Take a look how it treats e.g. MD_RENDER_FLAG_VERBATIM_ENTITIES.

Or is there something what I have overlooked?


/* Convenient sets of flags corresponding to well-known Markdown dialects.
* Note we may only support subset of features of the referred dialect.
Expand All @@ -279,6 +289,7 @@ typedef struct MD_SPAN_IMG_DETAIL {
*/
#define MD_DIALECT_COMMONMARK 0
#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH)
#define MD_DIALECT_REDDITPOST (MD_FLAG_PERMISSIVEATXHEADERS | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_REDDITSLASHDETECTION | MD_FLAG_STRIKETHROUGH)

/* Renderer structure.
*/
Expand Down