Skip to content

Commit e951d54

Browse files
authored
Merge pull request #59 from PHPCSStandards/feature/new-orthography-class
New Utils\Orthography class
2 parents 179baaf + 1906122 commit e951d54

File tree

3 files changed

+529
-0
lines changed

3 files changed

+529
-0
lines changed

PHPCSUtils/Utils/Orthography.php

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
<?php
2+
/**
3+
* PHPCSUtils, utility functions and classes for PHP_CodeSniffer sniff developers.
4+
*
5+
* @package PHPCSUtils
6+
* @copyright 2019 PHPCSUtils Contributors
7+
* @license https://opensource.org/licenses/LGPL-3.0 LGPL3
8+
* @link https://github.com/PHPCSStandards/PHPCSUtils
9+
*/
10+
11+
namespace PHPCSUtils\Utils;
12+
13+
use PHPCSUtils\BackCompat\Helper;
14+
15+
/**
16+
* Utility functions for checking the orthography of arbitrary text strings.
17+
*
18+
* > An orthography is a set of conventions for writing a language. It includes norms of spelling,
19+
* > hyphenation, capitalization, word breaks, emphasis, and punctuation.
20+
* > Source: https://en.wikipedia.org/wiki/Orthography
21+
*
22+
* @since 1.0.0
23+
*/
24+
class Orthography
25+
{
26+
27+
/**
28+
* Characters which are considered terminal points for a sentence.
29+
*
30+
* @link https://www.thepunctuationguide.com/terminal-points.html
31+
*
32+
* @since 1.0.0
33+
*
34+
* @var string
35+
*/
36+
const TERMINAL_POINTS = '.?!';
37+
38+
/**
39+
* Check if the first character of an arbitrary text string is a capital letter.
40+
*
41+
* Letter characters which do not have a concept of lower/uppercase will
42+
* be accepted as correctly capitalized.
43+
*
44+
* @since 1.0.0
45+
*
46+
* @param string $string The text string to examine.
47+
* This can be the contents of a text string token,
48+
* but also, for instance, a comment text.
49+
* Potential text delimiter quotes should be stripped
50+
* off a text string before passing it to this method.
51+
*
52+
* @return bool True when the first character is a capital letter or a letter
53+
* which doesn't have a concept of capitalization.
54+
* False otherwise, including for non-letter characters.
55+
*/
56+
public static function isFirstCharCapitalized($string)
57+
{
58+
$string = \ltrim($string);
59+
return (\preg_match('`^[\p{Lu}\p{Lt}\p{Lo}]`u', $string) > 0);
60+
}
61+
62+
/**
63+
* Check if the first character of an arbitrary text string is a lowercase letter.
64+
*
65+
* @since 1.0.0
66+
*
67+
* @param string $string The text string to examine.
68+
* This can be the contents of a text string token,
69+
* but also, for instance, a comment text.
70+
* Potential text delimiter quotes should be stripped
71+
* off a text string before passing it to this method.
72+
*
73+
* @return bool True when the first character is a lowercase letter.
74+
* False otherwise, including for letters which don't have a concept of
75+
* capitalization and for non-letter characters.
76+
*/
77+
public static function isFirstCharLowercase($string)
78+
{
79+
$string = \ltrim($string);
80+
return (\preg_match('`^\p{Ll}`u', $string) > 0);
81+
}
82+
83+
/**
84+
* Check if the last character of an arbitrary text string is a valid punctuation character.
85+
*
86+
* @since 1.0.0
87+
*
88+
* @param string $string The text string to examine.
89+
* This can be the contents of a text string token,
90+
* but also, for instance, a comment text.
91+
* Potential text delimiter quotes should be stripped
92+
* off a text string before passing it to this method.
93+
* @param string $allowedChars Characters which are considered valid punctuation
94+
* to end the text string.
95+
* Defaults to '.?!', i.e. a full stop, question mark
96+
* or exclamation mark.
97+
*
98+
* @return bool
99+
*/
100+
public static function isLastCharPunctuation($string, $allowedChars = self::TERMINAL_POINTS)
101+
{
102+
static $encoding;
103+
104+
if (isset($encoding) === false) {
105+
$encoding = Helper::getConfigData('encoding');
106+
}
107+
108+
$string = \rtrim($string);
109+
if (\function_exists('iconv_substr') === true) {
110+
$lastChar = \iconv_substr($string, -1, 1, $encoding);
111+
} else {
112+
$lastChar = \substr($string, -1);
113+
}
114+
115+
if (\function_exists('iconv_strpos') === true) {
116+
return (\iconv_strpos($allowedChars, $lastChar, 0, $encoding) !== false);
117+
} else {
118+
return (\strpos($allowedChars, $lastChar) !== false);
119+
}
120+
}
121+
}

0 commit comments

Comments
 (0)