Skip to content

Commit 9eda826

Browse files
Merge pull request #164 from mundschenk-at/unicode-refresh
Use "new" PHP unicode features
2 parents 6b94d2a + 723bbee commit 9eda826

File tree

8 files changed

+79
-88
lines changed

8 files changed

+79
-88
lines changed

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
"phpunit --testsuite PHP-Typography"
6969
],
7070
"coverage": [
71-
"phpunit --testsuite PHP-Typography --coverage-html tests/coverage"
71+
"XDEBUG_MODE=coverage phpunit --testsuite PHP-Typography --coverage-html tests/coverage"
7272
],
7373
"check": [
7474
"@phpcs",

src/bin/class-pattern-converter.php

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
namespace PHP_Typography\Bin;
2929

30-
use PHP_Typography\Strings;
30+
use PHP_Typography\U;
3131

3232
/**
3333
* Convert LaTeX hyphenation pattern files to JSON.
@@ -95,10 +95,7 @@ public function __construct( $urls, $language ) {
9595
'\p{Thai}',
9696

9797
// Very special characters.
98-
'[' . Strings::uchr(
99-
8204, // ZERO WIDTH NON-JOINER.
100-
8205 // ZERO WIDTH JOINER.
101-
) . ']',
98+
'[' . U::ZERO_WIDTH_JOINER . U::ZERO_WIDTH_NON_JOINER . ']',
10299
]
103100
)
104101
. ')';
@@ -124,7 +121,7 @@ protected function get_segment( $pattern ) {
124121
* @return string
125122
*/
126123
protected function get_sequence( $pattern ) {
127-
$characters = Strings::mb_str_split( \str_replace( '.', '_', $pattern ) );
124+
$characters = \mb_str_split( \str_replace( '.', '_', $pattern ) );
128125
$result = [];
129126

130127
foreach ( $characters as $index => $chr ) {

src/class-strings.php

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,9 @@ abstract class Strings {
7070
* @var array{
7171
* 'UTF-8' : String_Functions,
7272
* 'ASCII' : String_Functions,
73-
* 0 : array{}
7473
* }
7574
*/
76-
const STRING_FUNCTIONS = [
75+
private const STRING_FUNCTIONS = [
7776
'UTF-8' => [
7877
'strlen' => 'mb_strlen',
7978
'str_split' => 'mb_str_split',
@@ -90,7 +89,6 @@ abstract class Strings {
9089
'substr' => 'substr',
9190
'u' => '',
9291
],
93-
false => [],
9492
];
9593

9694
/**
@@ -100,7 +98,13 @@ abstract class Strings {
10098
* @return String_Functions|array{}
10199
*/
102100
public static function functions( $str ) {
103-
return self::STRING_FUNCTIONS[ \mb_detect_encoding( $str, self::ENCODINGS, true ) ]; // TODO: benchmark mb_check_encoding loop.
101+
foreach ( self::ENCODINGS as $encoding ) {
102+
if ( \mb_check_encoding( $str, $encoding ) ) {
103+
return self::STRING_FUNCTIONS[ $encoding ];
104+
}
105+
}
106+
107+
return [];
104108
}
105109

106110
/**
@@ -130,6 +134,8 @@ public static function mb_str_split( $string, $split_length = 1 ) {
130134
/**
131135
* Converts decimal value to unicode character.
132136
*
137+
* @deprecated 6.7.0
138+
*
133139
* @param int|string|array<string|int> $codes Decimal value(s) coresponding to unicode character(s).
134140
*
135141
* @return string Unicode character(s).

src/class-u.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,7 @@ interface U {
7474
const RIGHT_CORNER_BRACKET = "\xe3\x80\x8d";
7575
const LEFT_WHITE_CORNER_BRACKET = "\xe3\x80\x8e";
7676
const RIGHT_WHITE_CORNER_BRACKET = "\xe3\x80\x8f";
77+
const ZERO_WIDTH_JOINER = "\u{200c}";
78+
const ZERO_WIDTH_NON_JOINER = "\u{200d}";
7779

7880
}

src/fixes/node-fixes/class-style-caps-fix.php

Lines changed: 19 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -33,66 +33,41 @@
3333
/**
3434
* Wraps words of all caps (may include numbers) in <span class="caps"> if enabled.
3535
*
36-
* Call before style_numbers().Only call if you are certain that no html tags have been
37-
* injected containing capital letters.
36+
* Call before style_numbers(). Only call if you are certain that no html tags have
37+
* been injected containing capital letters.
3838
*
3939
* @author Peter Putzer <github@mundschenk.at>
4040
*
4141
* @since 5.0.0
4242
*/
4343
class Style_Caps_Fix extends Simple_Style_Fix {
44-
/*
45-
// \p{Lu} equals upper case letters and should match non english characters; since PHP 4.4.0 and 5.1.0
46-
// for more info, see http://www.regextester.com/pregsyntax.html#regexp.reference.unicode
47-
$this->components[ Settings::STYLE_CAPS ] = '
48-
(?<![\w\-_'.U::ZERO_WIDTH_SPACE.U::SOFT_HYPHEN.'])
49-
# negative lookbehind assertion
50-
(
51-
(?: # CASE 1: " 9A "
52-
[0-9]+ # starts with at least one number
53-
\p{Lu} # must contain at least one capital letter
54-
(?:\p{Lu}|[0-9]|\-|_|'.U::ZERO_WIDTH_SPACE.'|'.U::SOFT_HYPHEN.')*
55-
# may be followed by any number of numbers capital letters, hyphens, underscores, zero width spaces, or soft hyphens
56-
)
57-
|
58-
(?: # CASE 2: " A9 "
59-
\p{Lu} # starts with capital letter
60-
(?:\p{Lu}|[0-9]) # must be followed a number or capital letter
61-
(?:\p{Lu}|[0-9]|\-|_|'.U::ZERO_WIDTH_SPACE.'|'.U::SOFT_HYPHEN.')*
62-
# may be followed by any number of numbers capital letters, hyphens, underscores, zero width spaces, or soft hyphens
6344

64-
)
65-
)
66-
(?![\w\-_'.U::ZERO_WIDTH_SPACE.U::SOFT_HYPHEN.'])
67-
# negative lookahead assertion
68-
'; // required modifiers: x (multiline pattern) u (utf8)
69-
*/
70-
71-
// Servers with PCRE compiled without "--enable-unicode-properties" fail at \p{Lu} by returning an empty string (this leaving the screen void of text
72-
// thus are testing this alternative.
45+
// PCRE needs to be compiled with "--enable-unicode-properties", but we already depend on that elsehwere.
7346
const REGEX = '/
74-
(?<![\w' . self::COMBINING_MARKS . ']) # negative lookbehind assertion
47+
(?<![\w' . self::COMBINING_MARKS . ']) # negative lookbehind assertion
7548
(
76-
(?: # CASE 1: " 9A "
77-
[0-9]+ # starts with at least one number
49+
(?: # CASE 1: " 9A "
50+
[0-9]+ # starts with at least one number
7851
(?:[' . self::COMBINING_MARKS . '])*
79-
# may contain hyphens, underscores, zero width spaces, or soft hyphens,
80-
[A-ZÀ-ÖØ-Ý] # but must contain at least one capital letter
81-
(?:[A-ZÀ-ÖØ-Ý]|[0-9]|[' . self::COMBINING_MARKS . '])*
82-
# may be followed by any number of numbers capital letters, hyphens, underscores, zero width spaces, or soft hyphens
52+
# may contain hyphens, underscores, zero width spaces, or soft hyphens,
53+
\p{Lu} # but must contain at least one capital letter
54+
(?:\p{Lu}|[0-9]|[' . self::COMBINING_MARKS . '])*
55+
# may be followed by any number of numbers capital letters, hyphens,
56+
# underscores, zero width spaces, or soft hyphens
8357
)
8458
|
85-
(?: # CASE 2: " A9 "
86-
[A-ZÀ-ÖØ-Ý] # starts with capital letter
87-
(?:[A-ZÀ-ÖØ-Ý]|[0-9]) # must be followed a number or capital letter
88-
(?:[A-ZÀ-ÖØ-Ý]|[0-9]|[' . self::COMBINING_MARKS . '])*
89-
# may be followed by any number of numbers capital letters, hyphens, underscores, zero width spaces, or soft hyphens
59+
(?: # CASE 2: " A9 "
60+
\p{Lu} # starts with capital letter
61+
(?:\p{Lu}|[0-9]) # must be followed a number or capital letter
62+
(?:\p{Lu}|[0-9]|[' . self::COMBINING_MARKS . '])*
63+
# may be followed by any number of numbers capital letters, hyphens,
64+
# underscores, zero width spaces, or soft hyphens
9065
)
9166
)
92-
(?![\w' . self::COMBINING_MARKS . ']) # negative lookahead assertion
67+
(?![\w' . self::COMBINING_MARKS . ']) # negative lookahead assertion
9368
/Sxu';
9469

95-
const COMBINING_MARKS = '\-_' . U::HYPHEN . U::SOFT_HYPHEN . U::ZERO_WIDTH_SPACE; // Needs to be part of character class.
70+
private const COMBINING_MARKS = '\-_' . U::HYPHEN . U::SOFT_HYPHEN . U::ZERO_WIDTH_SPACE; // Needs to be part of character class.
9671

9772
/**
9873
* Creates a new node fix with a class.

tests/class-settings-test.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
namespace PHP_Typography\Tests;
2626

2727
use PHP_Typography\Settings;
28-
use PHP_Typography\Strings;
2928
use PHP_Typography\U;
3029

3130
use PHP_Typography\Settings\Dashes;
@@ -44,7 +43,6 @@
4443
* @uses PHP_Typography\Settings\Simple_Quotes
4544
* @uses PHP_Typography\Settings\Dash_Style::get_styled_dashes
4645
* @uses PHP_Typography\Settings\Quote_Style::get_styled_quotes
47-
* @uses PHP_Typography\Strings::uchr
4846
* @uses PHP_Typography\DOM::inappropriate_tags
4947
*/
5048
class Settings_Test extends Testcase {

tests/class-strings-test.php

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
/**
33
* This file is part of PHP-Typography.
44
*
5-
* Copyright 2015-2020 Peter Putzer.
5+
* Copyright 2015-2022 Peter Putzer.
66
*
77
* This program is free software; you can redistribute it and/or modify
88
* it under the terms of the GNU General Public License as published by
@@ -74,6 +74,18 @@ public function test_functions() {
7474
$this->assert_string_functions( $func_utf8 );
7575
}
7676

77+
/**
78+
* Test ::functions.
79+
*
80+
* @covers ::functions
81+
*/
82+
public function test_functions_invalid_encoding() {
83+
$func = Strings::functions( \mb_convert_encoding( 'Ungültiges Encoding', 'ISO-8859-2' ) );
84+
85+
$this->assertTrue( \is_array( $func ) );
86+
$this->assertCount( 0, $func );
87+
}
88+
7789
/**
7890
* Provide data for testing uchr.
7991
*

tests/class-testcase.php

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
namespace PHP_Typography\Tests;
2626

2727
use PHP_Typography\Strings;
28+
use PHP_Typography\U;
2829
use PHP_Typography\Text_Parser\Token;
2930

3031
/**
@@ -167,78 +168,78 @@ protected function assert_tokens_not_same( $expected_value, array $actual_tokens
167168
protected function assert_smart_quotes_style( $style, $open, $close ) {
168169
switch ( $style ) {
169170
case 'doubleCurled':
170-
$this->assertSame( Strings::uchr( 8220 ), $open, "Opening quote $open did not match quote style $style." );
171-
$this->assertSame( Strings::uchr( 8221 ), $close, "Closeing quote $close did not match quote style $style." );
171+
$this->assertSame( U::DOUBLE_QUOTE_OPEN, $open, "Opening quote $open did not match quote style $style." );
172+
$this->assertSame( U::DOUBLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
172173
break;
173174

174175
case 'doubleCurledReversed':
175-
$this->assertSame( Strings::uchr( 8221 ), $open, "Opening quote $open did not match quote style $style." );
176-
$this->assertSame( Strings::uchr( 8221 ), $close, "Closeing quote $close did not match quote style $style." );
176+
$this->assertSame( U::DOUBLE_QUOTE_CLOSE, $open, "Opening quote $open did not match quote style $style." );
177+
$this->assertSame( U::DOUBLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
177178
break;
178179

179180
case 'doubleLow9':
180-
$this->assertSame( Strings::uchr( 8222 ), $open, "Opening quote $open did not match quote style $style." );
181-
$this->assertSame( Strings::uchr( 8221 ), $close, "Closeing quote $close did not match quote style $style." );
181+
$this->assertSame( U::DOUBLE_LOW_9_QUOTE, $open, "Opening quote $open did not match quote style $style." );
182+
$this->assertSame( U::DOUBLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
182183
break;
183184

184185
case 'doubleLow9Reversed':
185-
$this->assertSame( Strings::uchr( 8222 ), $open, "Opening quote $open did not match quote style $style." );
186-
$this->assertSame( Strings::uchr( 8220 ), $close, "Closeing quote $close did not match quote style $style." );
186+
$this->assertSame( U::DOUBLE_LOW_9_QUOTE, $open, "Opening quote $open did not match quote style $style." );
187+
$this->assertSame( U::DOUBLE_QUOTE_OPEN, $close, "Closeing quote $close did not match quote style $style." );
187188
break;
188189

189190
case 'singleCurled':
190-
$this->assertSame( Strings::uchr( 8216 ), $open, "Opening quote $open did not match quote style $style." );
191-
$this->assertSame( Strings::uchr( 8217 ), $close, "Closeing quote $close did not match quote style $style." );
191+
$this->assertSame( U::SINGLE_QUOTE_OPEN, $open, "Opening quote $open did not match quote style $style." );
192+
$this->assertSame( U::SINGLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
192193
break;
193194

194195
case 'singleCurledReversed':
195-
$this->assertSame( Strings::uchr( 8217 ), $open, "Opening quote $open did not match quote style $style." );
196-
$this->assertSame( Strings::uchr( 8217 ), $close, "Closeing quote $close did not match quote style $style." );
196+
$this->assertSame( U::SINGLE_QUOTE_CLOSE, $open, "Opening quote $open did not match quote style $style." );
197+
$this->assertSame( U::SINGLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
197198
break;
198199

199200
case 'singleLow9':
200-
$this->assertSame( Strings::uchr( 8218 ), $open, "Opening quote $open did not match quote style $style." );
201-
$this->assertSame( Strings::uchr( 8217 ), $close, "Closeing quote $close did not match quote style $style." );
201+
$this->assertSame( U::SINGLE_LOW_9_QUOTE, $open, "Opening quote $open did not match quote style $style." );
202+
$this->assertSame( U::SINGLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
202203
break;
203204

204205
case 'singleLow9Reversed':
205-
$this->assertSame( Strings::uchr( 8218 ), $open, "Opening quote $open did not match quote style $style." );
206-
$this->assertSame( Strings::uchr( 8216 ), $close, "Closeing quote $close did not match quote style $style." );
206+
$this->assertSame( U::SINGLE_LOW_9_QUOTE, $open, "Opening quote $open did not match quote style $style." );
207+
$this->assertSame( U::SINGLE_QUOTE_OPEN, $close, "Closeing quote $close did not match quote style $style." );
207208
break;
208209

209210
case 'doubleGuillemetsFrench':
210-
$this->assertSame( Strings::uchr( 171, 8239 ), $open, "Opening quote $open did not match quote style $style." );
211-
$this->assertSame( Strings::uchr( 8239, 187 ), $close, "Closeing quote $close did not match quote style $style." );
211+
$this->assertSame( U::GUILLEMET_OPEN . U::NO_BREAK_NARROW_SPACE, $open, "Opening quote $open did not match quote style $style." );
212+
$this->assertSame( U::NO_BREAK_NARROW_SPACE . U::GUILLEMET_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
212213
break;
213214

214215
case 'doubleGuillemets':
215-
$this->assertSame( Strings::uchr( 171 ), $open, "Opening quote $open did not match quote style $style." );
216-
$this->assertSame( Strings::uchr( 187 ), $close, "Closeing quote $close did not match quote style $style." );
216+
$this->assertSame( U::GUILLEMET_OPEN, $open, "Opening quote $open did not match quote style $style." );
217+
$this->assertSame( U::GUILLEMET_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
217218
break;
218219

219220
case 'doubleGuillemetsReversed':
220-
$this->assertSame( Strings::uchr( 187 ), $open, "Opening quote $open did not match quote style $style." );
221-
$this->assertSame( Strings::uchr( 171 ), $close, "Closeing quote $close did not match quote style $style." );
221+
$this->assertSame( U::GUILLEMET_CLOSE, $open, "Opening quote $open did not match quote style $style." );
222+
$this->assertSame( U::GUILLEMET_OPEN, $close, "Closeing quote $close did not match quote style $style." );
222223
break;
223224

224225
case 'singleGuillemets':
225-
$this->assertSame( Strings::uchr( 8249 ), $open, "Opening quote $open did not match quote style $style." );
226-
$this->assertSame( Strings::uchr( 8250 ), $close, "Closeing quote $close did not match quote style $style." );
226+
$this->assertSame( U::SINGLE_ANGLE_QUOTE_OPEN, $open, "Opening quote $open did not match quote style $style." );
227+
$this->assertSame( U::SINGLE_ANGLE_QUOTE_CLOSE, $close, "Closeing quote $close did not match quote style $style." );
227228
break;
228229

229230
case 'singleGuillemetsReversed':
230-
$this->assertSame( Strings::uchr( 8250 ), $open, "Opening quote $open did not match quote style $style." );
231-
$this->assertSame( Strings::uchr( 8249 ), $close, "Closeing quote $close did not match quote style $style." );
231+
$this->assertSame( U::SINGLE_ANGLE_QUOTE_CLOSE, $open, "Opening quote $open did not match quote style $style." );
232+
$this->assertSame( U::SINGLE_ANGLE_QUOTE_OPEN, $close, "Closeing quote $close did not match quote style $style." );
232233
break;
233234

234235
case 'cornerBrackets':
235-
$this->assertSame( Strings::uchr( 12300 ), $open, "Opening quote $open did not match quote style $style." );
236-
$this->assertSame( Strings::uchr( 12301 ), $close, "Closeing quote $close did not match quote style $style." );
236+
$this->assertSame( U::LEFT_CORNER_BRACKET, $open, "Opening quote $open did not match quote style $style." );
237+
$this->assertSame( U::RIGHT_CORNER_BRACKET, $close, "Closeing quote $close did not match quote style $style." );
237238
break;
238239

239240
case 'whiteCornerBracket':
240-
$this->assertSame( Strings::uchr( 12302 ), $open, "Opening quote $open did not match quote style $style." );
241-
$this->assertSame( Strings::uchr( 12303 ), $close, "Closeing quote $close did not match quote style $style." );
241+
$this->assertSame( U::LEFT_WHITE_CORNER_BRACKET, $open, "Opening quote $open did not match quote style $style." );
242+
$this->assertSame( U::RIGHT_WHITE_CORNER_BRACKET, $close, "Closeing quote $close did not match quote style $style." );
242243
break;
243244

244245
default:

0 commit comments

Comments
 (0)