Skip to content

Commit 18c11cc

Browse files
committed
Add support for special characters in \href
Resolves issue #9.
1 parent c199867 commit 18c11cc

File tree

4 files changed

+130
-1
lines changed

4 files changed

+130
-1
lines changed

library/PhpLatex/Node.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ class PhpLatex_Node
77
{
88
protected $_type;
99
protected $_props;
10+
11+
/** @var PhpLatex_Node[] */
1012
protected $_children = array();
1113

1214
/**
@@ -65,7 +67,7 @@ public function getChild($index)
6567
}
6668

6769
/**
68-
* @return array
70+
* @return PhpLatex_Node[]
6971
*/
7072
public function getChildren()
7173
{

library/PhpLatex/Parser.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,11 @@ protected function _parseControl($token, $mode, $environ = null) // {{{
581581
// parse arguments
582582
$numArgs = isset($spec['numArgs']) ? intval($spec['numArgs']) : 0;
583583

584+
// handle special cases for \url and \href commands
585+
if ($value === '\\href' || $value === '\\url') {
586+
$nodeArgs[] = $this->_parseUrl();
587+
}
588+
584589
while (count($nodeArgs) < $numArgs) {
585590
if (false === ($arg = $this->_parseArg($nodeMode, $environ, $parseArgs))) {
586591
// no argument found, create an artificial one
@@ -1047,6 +1052,64 @@ protected function _parseLeftRight($token, $mode, $environs)
10471052
return $node;
10481053
}
10491054

1055+
/**
1056+
* Parse URL agrument of \href and \url commands
1057+
*
1058+
* @return PhpLatex_Node
1059+
*/
1060+
protected function _parseUrl()
1061+
{
1062+
$content = '';
1063+
// skip comments and spaces before '{'
1064+
$this->_skipSpacesAndComments();
1065+
1066+
$arg = $this->_createNode(self::TYPE_GROUP, self::MODE_TEXT);
1067+
1068+
// expect an argument to be present
1069+
$next = $this->_peek();
1070+
if ($next['type'] !== PhpLatex_Lexer::TYPE_SPECIAL || $next['value'] !== '{') {
1071+
return $arg;
1072+
} else {
1073+
$this->_next();
1074+
}
1075+
1076+
// keep track of nested curly brackets
1077+
$nesting = 0;
1078+
while ($next = $this->_next()) {
1079+
if ($next['type'] === PhpLatex_Lexer::TYPE_SPECIAL) {
1080+
if ($next['value'] === '{') {
1081+
++$nesting;
1082+
} elseif ($next['value'] === '}') {
1083+
// check if it's the end of the nested group, or the end of url argument
1084+
if ($nesting === 0) {
1085+
// end of url argument
1086+
break;
1087+
}
1088+
--$nesting;
1089+
}
1090+
}
1091+
1092+
// unescape symbols
1093+
// in the urls backslash is escaped simply as '\\'
1094+
if ($next['type'] === PhpLatex_Lexer::TYPE_CSYMBOL) {
1095+
$content .= substr($next['value'], 1);
1096+
} else {
1097+
$content .= isset($next['raw']) ? $next['raw'] : $next['value'];
1098+
}
1099+
}
1100+
1101+
$content = trim($content);
1102+
1103+
// LF in urls are ignored
1104+
$content = str_replace("\n", '', $content);
1105+
1106+
$url = $this->_createNode(self::TYPE_TEXT, self::MODE_TEXT);
1107+
$url->value = $content;
1108+
$arg->addChild($url);
1109+
1110+
return $arg;
1111+
}
1112+
10501113
/**
10511114
* @param array $token
10521115
* @return bool

library/PhpLatex/Renderer/Abstract.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,32 @@ public static function toLatex($node) // {{{
5555
}
5656
return $value;
5757
}
58+
59+
if ($node->value === '\\href' || $node->value === '\\url') {
60+
foreach ($node->getChildren() as $index => $child) {
61+
if ($index === 0) {
62+
$url = $child->getChild(0);
63+
if ($url) {
64+
// The special characters # and % do not need to be escaped in any way (unless
65+
// the command is used in the argument of another command).
66+
// https://texdoc.org/serve/hyperref/0
67+
68+
// Only control words (i.e. starting with a backslash) cause miscellaneous errors, among others:
69+
// - You've closed more groups than you opened.
70+
// - TeX capacity exceeded, sorry
71+
// - Paragraph ended before \hyper@n@rmalise was complete.
72+
// all other characters, normally treated as special, lose their meanings inside \href and \url
73+
$value .= '{' . str_replace('\\', '\\\\', (string) $url->value) . '}';
74+
} else {
75+
$value .= '{}';
76+
}
77+
} else {
78+
$value .= self::toLatex($child);
79+
}
80+
}
81+
return $value;
82+
}
83+
5884
if ($node->symbol || $node->hasChildren()) {
5985
return $value . self::toLatex($node->getChildren());
6086
}

tests/PhpLatex/Parser/HrefTest.php

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
class PhpLatex_Parser_HrefTest extends PHPUnit_Framework_TestCase
4+
{
5+
/**
6+
* @var PhpLatex_Parser
7+
*/
8+
protected $parser;
9+
10+
protected function setUp()
11+
{
12+
parent::setUp();
13+
$this->parser = new PhpLatex_Parser();
14+
}
15+
16+
/**
17+
* @see https://github.com/xemlock/php-latex/issues/9
18+
*/
19+
function testParse()
20+
{
21+
$input = '\href{https://ja.wikipedia.org/wiki/%E9%9B%BB%E5%AD%90?utm_source=test&utm_medium=email}{electron}';
22+
$tree = $this->parser->parse($input);
23+
$this->assertSame($input, PhpLatex_Renderer_Abstract::toLatex($tree));
24+
}
25+
26+
function testSpecialSymbols()
27+
{
28+
$input = '\url{https://test.com/~user{a}$2%20x#test}';
29+
$tree = $this->parser->parse($input);
30+
$this->assertSame($input, PhpLatex_Renderer_Abstract::toLatex($tree));
31+
}
32+
33+
function testBackslash() {
34+
$input = '\href{run:C:\path\to\script.bat}{File}';
35+
$tree = $this->parser->parse($input);
36+
$this->assertSame('\href{run:C:\\\\path\\\\to\\\\script.bat}{File}', PhpLatex_Renderer_Abstract::toLatex($tree));
37+
}
38+
}

0 commit comments

Comments
 (0)