Skip to content

Commit 0b9aeea

Browse files
authored
Merge pull request #111 from stronk7/lang_string_sorted_please
Lang string sorted please
2 parents 3966e9f + 238d3f4 commit 0b9aeea

File tree

10 files changed

+737
-0
lines changed

10 files changed

+737
-0
lines changed
Lines changed: 335 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
<?php
2+
3+
// This file is part of Moodle - https://moodle.org/
4+
//
5+
// Moodle is free software: you can redistribute it and/or modify
6+
// it under the terms of the GNU General Public License as published by
7+
// the Free Software Foundation, either version 3 of the License, or
8+
// (at your option) any later version.
9+
//
10+
// Moodle is distributed in the hope that it will be useful,
11+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
// GNU General Public License for more details.
14+
//
15+
// You should have received a copy of the GNU General Public License
16+
// along with Moodle. If not, see <https://www.gnu.org/licenses/>.
17+
18+
/**
19+
* This sniff verifies that lang files are sorted alphabetically by string key.
20+
*
21+
* @copyright 2024 onwards Eloy Lafuente (stronk7) {@link https://stronk7.com}
22+
* @license https://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23+
*/
24+
25+
namespace MoodleHQ\MoodleCS\moodle\Sniffs\Files;
26+
27+
use MoodleHQ\MoodleCS\moodle\Util\MoodleUtil;
28+
use PHP_CodeSniffer\Files\File;
29+
use PHP_CodeSniffer\Sniffs\Sniff;
30+
31+
class LangFilesOrderingSniff implements Sniff
32+
{
33+
/**
34+
* @var string|null The previous string that has been processed.
35+
*
36+
* We use this variable to compare the current string with the previous one. And decide
37+
* if the current string is a duplicate or if it's out of order.
38+
*/
39+
protected ?string $previousString = null;
40+
41+
/**
42+
* @var int pointer to the token where we should stop fixing the file (defaults to last token).
43+
*
44+
* When we find a comment that is not a "Deprecated since Moodle" one, we will stop fixing the file.
45+
*/
46+
protected int $stopFixingPtr = 999999999;
47+
48+
/**
49+
* @var array An array which keys are all the known strings, grouped, and the values are the start and end pointers to them.
50+
*
51+
* We use this array to, accurately, know where to move every string on each fixing iteration.
52+
*/
53+
protected array $strings = [];
54+
55+
public function register(): array {
56+
return [T_OPEN_TAG]; // We are going to process the whole file, finding all the strings and comments within it.
57+
}
58+
59+
public function process(File $phpcsFile, $stackPtr): void {
60+
// If the file is not a lang file, return.
61+
if (!MoodleUtil::isLangFile($phpcsFile)) {
62+
return;
63+
}
64+
65+
// Only for Moodle 4.4dev (404) and up.
66+
// Make and exception for unit tests, so they are run always.
67+
if (!MoodleUtil::meetsMinimumMoodleVersion($phpcsFile, 404) && !MoodleUtil::isUnitTestRunning()) {
68+
return; // @codeCoverageIgnore
69+
}
70+
71+
// Get the file tokens, for ease of use.
72+
$tokens = $phpcsFile->getTokens();
73+
74+
// Ensure that we start from scratch on each file and pass.
75+
$this->previousString = null;
76+
$this->stopFixingPtr = 999999999;
77+
$this->strings = [];
78+
79+
// Let's find the first variable and start the process.
80+
$currentPtr = $phpcsFile->findNext(T_VARIABLE, $stackPtr + 1);
81+
if ($currentPtr === false) {
82+
return; // No strings found, nothing to do.
83+
}
84+
85+
// It's time to iterate over all the strings and comments till the end of the file.
86+
// We'll go accumulating all the strings by group, with their start and end pointers as values.
87+
$currentGroup = 'main'; // The default group to start with, we'll change it each time we find a new section.
88+
do {
89+
// Let's manage comments first (so we know if we are changing the current group).
90+
91+
// Correct comments are in new line and begin with "// Deprecated since ".
92+
if (
93+
$tokens[$currentPtr]['code'] === T_COMMENT &&
94+
strpos($tokens[$currentPtr]['content'], '// Deprecated since ') === 0 &&
95+
$tokens[$currentPtr - 1]['content'] === "\n"
96+
) {
97+
$currentGroup = trim($tokens[$currentPtr]['content']);
98+
}
99+
100+
// If we find a comment that is not the standard one, we will stop fixing the file here. And error.
101+
if (
102+
$tokens[$currentPtr]['code'] === T_COMMENT &&
103+
strpos($tokens[$currentPtr]['content'], '// Deprecated since ') === false
104+
) {
105+
$phpcsFile->addWarning(
106+
'Unexpected comment found. Auto-fixing will not work after this comment',
107+
$currentPtr,
108+
'UnexpectedComment'
109+
);
110+
if ($this->stopFixingPtr > $currentPtr) {
111+
$this->stopFixingPtr = $currentPtr; // Update the stop fixing pointer.
112+
}
113+
}
114+
115+
if ($tokens[$currentPtr]['code'] === T_COMMENT) {
116+
continue; // We are done for comment tokens.
117+
}
118+
119+
// Arrived here, all the tokens are variables, so we don't need to check for that.
120+
121+
// If the name of the variable is not "$string", error.
122+
if ($tokens[$currentPtr]['content'] !== '$string') {
123+
$phpcsFile->addError(
124+
'Variable "%s" is not expected in a lang file',
125+
$currentPtr,
126+
'UnexpectedVariable',
127+
[$tokens[$currentPtr]['content']]
128+
);
129+
continue; // We are done for this token.
130+
}
131+
132+
// Get the string key, if any.
133+
if (!$stringKey = $this->getStringKey($phpcsFile, $currentPtr)) {
134+
continue; // Problems with this string key, skip it (has been already reported).
135+
}
136+
137+
// Have found a valid $string[KEY], let's calculate the end and store it.
138+
if ($currentEnd = $this->getStringEnd($phpcsFile, $currentPtr)) {
139+
if (!isset($this->strings[$currentGroup])) {
140+
$this->strings[$currentGroup] = [];
141+
$this->previousString = null; // Reset the previous string on new group.
142+
}
143+
// Check if the string already has been found earlier.
144+
if (isset($this->strings[$currentGroup][$stringKey])) {
145+
$phpcsFile->addError('The string key "%s" is duplicated', $currentPtr, 'DuplicatedKey', [$stringKey]);
146+
continue; // We are done for this string, won't report anything about it till fixed.
147+
} else {
148+
// We can safely add the string to the group, if we are before the last pointer to fix.
149+
if ($currentPtr < $this->stopFixingPtr) {
150+
$this->strings[$currentGroup][$stringKey] = [$currentPtr, $currentEnd];
151+
}
152+
}
153+
}
154+
155+
if (null === $currentEnd) {
156+
// The string end is not as expected, report as error unless the next token
157+
// after the semicolon is a comment. In that case, we won't report it because
158+
// UnexpectedComment will take on it.
159+
$delegateToUnexpectedComment = false;
160+
$semicolonPtr = $phpcsFile->findNext(T_SEMICOLON, $currentPtr + 1);
161+
if (
162+
(
163+
isset($tokens[$semicolonPtr + 1]) && // There is a next token (not the end of the file)
164+
$tokens[$semicolonPtr + 1]['code'] === T_COMMENT // And it's a comment.
165+
) ||
166+
(
167+
isset($tokens[$semicolonPtr + 2]) && // Or there are 2 more tokens (not the end of the file).
168+
$tokens[$semicolonPtr + 1]['code'] === T_WHITESPACE && // And they are whitespace + comment.
169+
$tokens[$semicolonPtr + 2]['code'] === T_COMMENT
170+
)
171+
) {
172+
$delegateToUnexpectedComment = true;
173+
}
174+
if (!$delegateToUnexpectedComment) {
175+
$phpcsFile->addError(
176+
'Unexpected string end, it should be a line feed after a semicolon',
177+
$currentPtr,
178+
'UnexpectedEnd'
179+
);
180+
continue; // We are done for this string, won't report anything about it till fixed.
181+
}
182+
}
183+
184+
// Note: We only issue these warnings if there are previous strings to compare with,
185+
// and, obviously, if the string is out of order.
186+
if ($this->previousString && strcmp($this->previousString, $stringKey) > 0) {
187+
// We are going to add this as fixable warning only if we are
188+
// before the last pointer to fix. This is an unordered string.
189+
$phpcsFile->addWarning(
190+
'The string key "%s" is not in the correct order, it should be before "%s"',
191+
$currentPtr,
192+
'IncorrectOrder',
193+
[$stringKey, $this->previousString],
194+
0,
195+
$currentPtr < $this->stopFixingPtr
196+
);
197+
}
198+
199+
// Feed $previousString with the current string key.
200+
$this->previousString = $stringKey;
201+
} while ($currentPtr = $phpcsFile->findNext([T_VARIABLE, T_COMMENT], $currentPtr + 1)); // Move to next.
202+
203+
// If we are fixing the file, we need to sort the strings and move them to the correct position.
204+
if ($phpcsFile->fixer->enabled) {
205+
$this->sortStringsAndFix($phpcsFile);
206+
}
207+
}
208+
209+
/**
210+
* Given a lang file, fix all the sorting issues found.
211+
*
212+
* This is really similar to the insertion-sort algorithm, but with
213+
* a few optimisations to avoid unnecessary iterations. Should be
214+
* efficient enough against lists that are expected to be not
215+
* too long and already mostly sorted.
216+
*
217+
* @param File $phpcsFile The lang file being processed.
218+
*/
219+
protected function sortStringsAndFix(File $phpcsFile): void {
220+
// Because of hard restrictions in CodeSniffer fixer (we cannot apply more than one change
221+
// to the same token in the same pass), we need to accumulate all the changes and apply them
222+
// at the end of the process. So we are going to build a big changeset to be applied all together.
223+
// Keys will be the token index and values an array, with operation (index, DELETE, INSERT) and content.
224+
225+
// Get the file tokens, for ease of use.
226+
$tokens = $phpcsFile->getTokens();
227+
228+
// We are going to perform the sorting within each detected group/section.
229+
foreach ($this->strings as $group => $strings) {
230+
$changeSet = []; // The changeset to be applied at the end of the iteration.
231+
232+
$strings = $this->strings[$group];
233+
// Let's compare the keys in the array of strings with the sorted version of it.
234+
$sorted = $unSorted = array_keys($strings);
235+
sort($sorted, SORT_STRING);
236+
$count = count($sorted);
237+
for ($i = 0; $i < $count; $i++) {
238+
$sortedKey = $sorted[$i];
239+
$stringsKey = $unSorted[$i];
240+
241+
// The string being checked is not in order (comparing with the sorted array).
242+
if ($sortedKey !== $stringsKey) {
243+
// Apply the changes to the strings array by moving the key to the correct position.
244+
$keyValue = $strings[$sortedKey];
245+
// Remove the element to move.
246+
unset($strings[$sortedKey]);
247+
// Rebuild the array, with the element in new position.
248+
$strings = array_slice($strings, 0, $i, true) +
249+
[$sortedKey => $keyValue] +
250+
array_slice($strings, $i, null, true);
251+
$this->strings[$group] = $strings; // Update the group array with the rebuilt version.
252+
$unSorted = array_keys($strings); // Update the unsorted keys array.
253+
254+
// Now add the required changes to the changeset that we'll be using when fixing the file.
255+
// For every token in the string being moved, delete it and add it in the correct position.
256+
foreach (range($keyValue[0], $keyValue[1]) as $tokenIndex) {
257+
$tempToken = $tokens[$tokenIndex]; // Store the token.
258+
$changeSet[$tokenIndex]['DELETE'] = ''; // Delete the current string token.
259+
// Insert the token before the previous string.
260+
if (!isset($changeSet[$strings[$stringsKey][0] - 1]['INSERT'])) {
261+
$changeSet[$strings[$stringsKey][0] - 1]['INSERT'] = '';
262+
}
263+
$changeSet[$strings[$stringsKey][0] - 1]['INSERT'] .= $tempToken['content'];
264+
}
265+
}
266+
}
267+
268+
// Let's apply the accumulated changes to the file.
269+
if (!empty($changeSet)) {
270+
$phpcsFile->fixer->beginChangeset();
271+
foreach ($changeSet as $tokenIndex => $operations) {
272+
if (isset($operations['DELETE'])) {
273+
$phpcsFile->fixer->replaceToken($tokenIndex, '');
274+
}
275+
if (isset($operations['INSERT'])) {
276+
$phpcsFile->fixer->addContent($tokenIndex, $operations['INSERT']);
277+
}
278+
}
279+
$phpcsFile->fixer->endChangeset();
280+
}
281+
}
282+
}
283+
284+
/**
285+
* Return the string key corresponding to the string at the pointer.
286+
* Note that the key has got any quote (single or double) trimmed.
287+
*
288+
* @param File $phpcsFile
289+
* @param int $stackPtr
290+
* @return string|null
291+
*/
292+
protected function getStringKey(File $phpcsFile, int $stackPtr): ?string {
293+
$tokens = $phpcsFile->getTokens();
294+
295+
// If the structure is not exactly: $string[KEY], add error and return null.
296+
if (
297+
$tokens[$stackPtr + 1]['code'] !== T_OPEN_SQUARE_BRACKET ||
298+
$tokens[$stackPtr + 2]['code'] !== T_CONSTANT_ENCAPSED_STRING ||
299+
$tokens[$stackPtr + 3]['code'] !== T_CLOSE_SQUARE_BRACKET
300+
) {
301+
$phpcsFile->addError(
302+
"Unexpected string syntax, it should be `\$string['key']`",
303+
$stackPtr,
304+
'UnexpectedSyntax'
305+
);
306+
return null;
307+
}
308+
309+
// Now we can safely extract the string key and return it.
310+
return trim($tokens[$stackPtr + 2]['content'], "'\"");
311+
}
312+
313+
/**
314+
* Return the string final pointer, it should be always a \n after a T_SEMICOLON.
315+
*
316+
* @param File $phpcsFile
317+
* @param int $stackPtr
318+
* @return int|null The pointer to the end of the string, or null if it's not an expected string end.
319+
*/
320+
protected function getStringEnd(File $phpcsFile, int $stackPtr): ?int {
321+
$tokens = $phpcsFile->getTokens();
322+
$currentEndToken = $phpcsFile->findNext(T_SEMICOLON, $stackPtr + 1) + 1;
323+
324+
// Verify that the current end token is a line feed, if not, we won't be able to fix (swap).
325+
if (
326+
!isset($tokens[$currentEndToken]) ||
327+
$tokens[$currentEndToken]['code'] !== T_WHITESPACE ||
328+
$tokens[$currentEndToken]['content'] !== "\n"
329+
) {
330+
return null; // This is not an expected string end.
331+
}
332+
333+
return $currentEndToken;
334+
}
335+
}

0 commit comments

Comments
 (0)