-
Notifications
You must be signed in to change notification settings - Fork 7
[WIP][FEATURE] Add dbdoctor rule for orphaned translation records #171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ErHaWeb
wants to merge
1
commit into
lolli42:main
Choose a base branch
from
ErHaWeb:feature/orphaned-translation-records
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
154 changes: 154 additions & 0 deletions
154
Classes/HealthCheck/TcaTablesTranslatedLanguageNotInSiteConfiguration.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Lolli\Dbdoctor\HealthCheck; | ||
|
|
||
| /* | ||
| * This file is part of the TYPO3 CMS project. | ||
| * | ||
| * It is free software; you can redistribute it and/or modify it under | ||
| * the terms of the GNU General Public License, either version 2 | ||
| * of the License, or any later version. | ||
| * | ||
| * For the full copyright and license information, please read the | ||
| * LICENSE.txt file that was distributed with this source code. | ||
| * | ||
| * The TYPO3 project - inspiring people to share! | ||
| */ | ||
| use Lolli\Dbdoctor\Helper\TableHelper; | ||
| use Symfony\Component\Console\Style\SymfonyStyle; | ||
| use TYPO3\CMS\Core\Database\Connection; | ||
| use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction; | ||
| use TYPO3\CMS\Core\Exception\SiteNotFoundException; | ||
| use TYPO3\CMS\Core\Site\Entity\Site; | ||
| use TYPO3\CMS\Core\Site\SiteFinder; | ||
| use TYPO3\CMS\Core\Utility\GeneralUtility; | ||
|
|
||
| /** | ||
| * Translated records should reference a sys_language_uid that is configured in the | ||
| * site configuration of the page they are located on. Records with a language not | ||
| * in the site configuration are orphaned translations typically created by DataHandler | ||
| * copy operations between sites. | ||
| */ | ||
| final class TcaTablesTranslatedLanguageNotInSiteConfiguration extends AbstractHealthCheck implements HealthCheckInterface | ||
| { | ||
| private SiteFinder $siteFinder; | ||
|
|
||
| public function __construct(SiteFinder $siteFinder) | ||
| { | ||
| $this->siteFinder = $siteFinder; | ||
| } | ||
|
|
||
| public function header(SymfonyStyle $io): void | ||
| { | ||
| $io->section('Scan for translated records with language not in site configuration'); | ||
| $this->outputClass($io); | ||
| $this->outputTags($io, self::TAG_SOFT_DELETE, self::TAG_REMOVE, self::TAG_WORKSPACE_REMOVE); | ||
| $io->text([ | ||
| 'Translated records reference a sys_language_uid. This language must be configured', | ||
| 'in the site configuration of the page they are located on. This check finds records', | ||
| 'with a sys_language_uid that does not exist in the site configuration and removes them.', | ||
| ]); | ||
| } | ||
|
|
||
| protected function getAffectedRecords(): array | ||
| { | ||
| /** @var TableHelper $tableHelper */ | ||
| $tableHelper = $this->container->get(TableHelper::class); | ||
|
|
||
| /** @var array<int, Site|false> $siteCache */ | ||
| $siteCache = []; | ||
| /** @var array<string, array<int, true>> $siteLanguageCache */ | ||
| $siteLanguageCache = []; | ||
|
|
||
| $affectedRows = []; | ||
| foreach ($this->tcaHelper->getNextLanguageAwareTcaTable() as $tableName) { | ||
| if (!$tableHelper->tableExistsInDatabase($tableName)) { | ||
| // TCA may define tables not yet present in database schema. | ||
| continue; | ||
| } | ||
|
|
||
| /** @var string $languageField */ | ||
| $languageField = $this->tcaHelper->getLanguageField($tableName); | ||
| $workspaceIdField = $this->tcaHelper->getWorkspaceIdField($tableName); | ||
| $isTableWorkspaceAware = !empty($workspaceIdField); | ||
|
|
||
| $selectFields = [ | ||
| 'uid', | ||
| 'pid', | ||
| $languageField, | ||
| ]; | ||
| if ($isTableWorkspaceAware) { | ||
| $selectFields[] = $workspaceIdField; | ||
| $selectFields[] = 't3ver_state'; | ||
| } | ||
|
|
||
| $queryBuilder = $this->connectionPool->getQueryBuilderForTable($tableName); | ||
| // Do not consider already deleted records: Those are not visible and will not cause | ||
| // issues. Reducing the number of affected records avoids unnecessary noise. | ||
| $queryBuilder->getRestrictions()->removeAll()->add(GeneralUtility::makeInstance(DeletedRestriction::class)); | ||
| $queryBuilder | ||
| ->select(...$selectFields) | ||
| ->from($tableName) | ||
| ->where( | ||
| $queryBuilder->expr()->gt($languageField, $queryBuilder->createNamedParameter(0, Connection::PARAM_INT)), | ||
| $queryBuilder->expr()->neq($languageField, $queryBuilder->createNamedParameter(-1, Connection::PARAM_INT)) | ||
| ) | ||
| ->orderBy('uid'); | ||
|
|
||
| if ($isTableWorkspaceAware) { | ||
| // Skip DELETE_PLACEHOLDER records (t3ver_state = 2), those are workspace internals. | ||
| $queryBuilder->andWhere( | ||
| $queryBuilder->expr()->neq('t3ver_state', $queryBuilder->createNamedParameter(2, Connection::PARAM_INT)) | ||
| ); | ||
| } | ||
|
|
||
| $result = $queryBuilder->executeQuery(); | ||
| while ($row = $result->fetchAssociative()) { | ||
| /** @var array<string, int|string> $row */ | ||
| $pid = (int)$row['pid']; | ||
| $langId = (int)$row[$languageField]; | ||
|
|
||
| // Resolve site for this pid, cached. Records on pages without site config | ||
| // (e.g. pid 0 or pages not below a site root) are skipped: No site means | ||
| // no language configuration to validate against. | ||
| if (!array_key_exists($pid, $siteCache)) { | ||
| try { | ||
| $siteCache[$pid] = $this->siteFinder->getSiteByPageId($pid); | ||
| } catch (SiteNotFoundException) { | ||
| $siteCache[$pid] = false; | ||
| } | ||
| } | ||
| if ($siteCache[$pid] === false) { | ||
| continue; | ||
| } | ||
| $site = $siteCache[$pid]; | ||
|
|
||
| $siteIdentifier = $site->getIdentifier(); | ||
| if (!isset($siteLanguageCache[$siteIdentifier])) { | ||
| $siteLanguageCache[$siteIdentifier] = []; | ||
| foreach ($site->getAllLanguages() as $siteLanguage) { | ||
| $siteLanguageCache[$siteIdentifier][$siteLanguage->getLanguageId()] = true; | ||
| } | ||
| } | ||
|
|
||
| if (!isset($siteLanguageCache[$siteIdentifier][$langId])) { | ||
| $row['_reasonBroken'] = 'LanguageNotInSiteConfiguration'; | ||
| $affectedRows[$tableName][(int)$row['uid']] = $row; | ||
| } | ||
| } | ||
| } | ||
| return $affectedRows; | ||
| } | ||
|
|
||
| protected function processRecords(SymfonyStyle $io, bool $simulate, array $affectedRecords): void | ||
| { | ||
| $this->softOrHardDeleteRecords($io, $simulate, $affectedRecords); | ||
| } | ||
|
|
||
| protected function recordDetails(SymfonyStyle $io, array $affectedRecords): void | ||
| { | ||
| $this->outputRecordDetails($io, $affectedRecords, '_reasonBroken'); | ||
| } | ||
| } | ||
186 changes: 186 additions & 0 deletions
186
Classes/HealthCheck/TcaTablesTranslatedLanguagePageTranslationMissing.php
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace Lolli\Dbdoctor\HealthCheck; | ||
|
|
||
| /* | ||
| * This file is part of the TYPO3 CMS project. | ||
| * | ||
| * It is free software; you can redistribute it and/or modify it under | ||
| * the terms of the GNU General Public License, either version 2 | ||
| * of the License, or any later version. | ||
| * | ||
| * For the full copyright and license information, please read the | ||
| * LICENSE.txt file that was distributed with this source code. | ||
| * | ||
| * The TYPO3 project - inspiring people to share! | ||
| */ | ||
| use Lolli\Dbdoctor\Helper\TableHelper; | ||
| use Symfony\Component\Console\Style\SymfonyStyle; | ||
| use TYPO3\CMS\Core\Database\Connection; | ||
| use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction; | ||
| use TYPO3\CMS\Core\Exception\SiteNotFoundException; | ||
| use TYPO3\CMS\Core\Site\Entity\Site; | ||
| use TYPO3\CMS\Core\Site\SiteFinder; | ||
| use TYPO3\CMS\Core\Utility\GeneralUtility; | ||
|
|
||
| /** | ||
| * Translated records (except pages) need a corresponding page translation to exist. | ||
| * A record translated to a language for which no page translation exists on the same | ||
| * pid is orphaned and should be removed. This check only considers languages that are | ||
| * configured in the site configuration. | ||
| */ | ||
| final class TcaTablesTranslatedLanguagePageTranslationMissing extends AbstractHealthCheck implements HealthCheckInterface | ||
| { | ||
| private SiteFinder $siteFinder; | ||
|
|
||
| public function __construct(SiteFinder $siteFinder) | ||
| { | ||
| $this->siteFinder = $siteFinder; | ||
| } | ||
|
|
||
| public function header(SymfonyStyle $io): void | ||
| { | ||
| $io->section('Scan for translated records without corresponding page translation'); | ||
| $this->outputClass($io); | ||
| $this->outputTags($io, self::TAG_SOFT_DELETE, self::TAG_REMOVE, self::TAG_WORKSPACE_REMOVE); | ||
| $io->text([ | ||
| 'Translated records need a corresponding page translation on their pid to be valid.', | ||
| 'When a page translation for a given language does not exist, content records translated', | ||
| 'to that language are orphaned. This check finds and removes such records. Only languages', | ||
| 'configured in the site configuration are considered.', | ||
| ]); | ||
| } | ||
|
|
||
| protected function getAffectedRecords(): array | ||
| { | ||
| /** @var TableHelper $tableHelper */ | ||
| $tableHelper = $this->container->get(TableHelper::class); | ||
|
|
||
| if (!$tableHelper->tableExistsInDatabase('pages')) { | ||
| // pages table is required for page translation lookups. | ||
| return []; | ||
| } | ||
|
|
||
| /** @var array<int, Site|false> $siteCache */ | ||
| $siteCache = []; | ||
| /** @var array<string, array<int, true>> $siteLanguageCache */ | ||
| $siteLanguageCache = []; | ||
| /** @var array<int, array<int, true>> $pageTranslationCache */ | ||
| $pageTranslationCache = []; | ||
|
|
||
| $affectedRows = []; | ||
| foreach ($this->tcaHelper->getNextLanguageAwareTcaTable(['pages']) as $tableName) { | ||
| if (!$tableHelper->tableExistsInDatabase($tableName)) { | ||
| // TCA may define tables not yet present in database schema. | ||
| continue; | ||
| } | ||
|
|
||
| /** @var string $languageField */ | ||
| $languageField = $this->tcaHelper->getLanguageField($tableName); | ||
| $workspaceIdField = $this->tcaHelper->getWorkspaceIdField($tableName); | ||
| $isTableWorkspaceAware = !empty($workspaceIdField); | ||
|
|
||
| $selectFields = [ | ||
| 'uid', | ||
| 'pid', | ||
| $languageField, | ||
| ]; | ||
| if ($isTableWorkspaceAware) { | ||
| $selectFields[] = $workspaceIdField; | ||
| $selectFields[] = 't3ver_state'; | ||
| } | ||
|
|
||
| $queryBuilder = $this->connectionPool->getQueryBuilderForTable($tableName); | ||
| // Do not consider already deleted records: Those are not visible and will not cause | ||
| // issues. Reducing the number of affected records avoids unnecessary noise. | ||
| $queryBuilder->getRestrictions()->removeAll()->add(GeneralUtility::makeInstance(DeletedRestriction::class)); | ||
| $queryBuilder | ||
| ->select(...$selectFields) | ||
| ->from($tableName) | ||
| ->where( | ||
| $queryBuilder->expr()->gt($languageField, $queryBuilder->createNamedParameter(0, Connection::PARAM_INT)), | ||
| $queryBuilder->expr()->neq($languageField, $queryBuilder->createNamedParameter(-1, Connection::PARAM_INT)) | ||
| ) | ||
| ->orderBy('uid'); | ||
|
|
||
| if ($isTableWorkspaceAware) { | ||
| // Skip DELETE_PLACEHOLDER records (t3ver_state = 2), those are workspace internals. | ||
| $queryBuilder->andWhere( | ||
| $queryBuilder->expr()->neq('t3ver_state', $queryBuilder->createNamedParameter(2, Connection::PARAM_INT)) | ||
| ); | ||
| } | ||
|
|
||
| $result = $queryBuilder->executeQuery(); | ||
| while ($row = $result->fetchAssociative()) { | ||
| /** @var array<string, int|string> $row */ | ||
| $pid = (int)$row['pid']; | ||
| $langId = (int)$row[$languageField]; | ||
|
|
||
| // Resolve site for this pid, cached. Records on pages without site config | ||
| // (e.g. pid 0 or pages not below a site root) are skipped: No site means | ||
| // no language configuration to validate against. | ||
| if (!array_key_exists($pid, $siteCache)) { | ||
| try { | ||
| $siteCache[$pid] = $this->siteFinder->getSiteByPageId($pid); | ||
| } catch (SiteNotFoundException) { | ||
| $siteCache[$pid] = false; | ||
| } | ||
| } | ||
| if ($siteCache[$pid] === false) { | ||
| continue; | ||
| } | ||
| $site = $siteCache[$pid]; | ||
|
|
||
| // Only check languages that are configured in the site. Records with a language | ||
| // not in site config are handled by TcaTablesTranslatedLanguageNotInSiteConfiguration. | ||
| $siteIdentifier = $site->getIdentifier(); | ||
| if (!isset($siteLanguageCache[$siteIdentifier])) { | ||
| $siteLanguageCache[$siteIdentifier] = []; | ||
| foreach ($site->getAllLanguages() as $siteLanguage) { | ||
| $siteLanguageCache[$siteIdentifier][$siteLanguage->getLanguageId()] = true; | ||
| } | ||
| } | ||
| if (!isset($siteLanguageCache[$siteIdentifier][$langId])) { | ||
| continue; | ||
| } | ||
|
|
||
| // Check if a page translation exists for this pid and language, cached. | ||
| // Do not consider deleted page translations: A deleted page translation means | ||
| // content translations on that page are orphaned and should be removed. | ||
| if (!isset($pageTranslationCache[$pid])) { | ||
| $pageTranslationCache[$pid] = []; | ||
| $pageQueryBuilder = $this->connectionPool->getQueryBuilderForTable('pages'); | ||
| $pageQueryBuilder->getRestrictions()->removeAll()->add(GeneralUtility::makeInstance(DeletedRestriction::class)); | ||
| $pageResult = $pageQueryBuilder | ||
| ->select('sys_language_uid') | ||
| ->from('pages') | ||
| ->where( | ||
| $pageQueryBuilder->expr()->eq('l10n_parent', $pageQueryBuilder->createNamedParameter($pid, Connection::PARAM_INT)), | ||
| $pageQueryBuilder->expr()->gt('sys_language_uid', $pageQueryBuilder->createNamedParameter(0, Connection::PARAM_INT)) | ||
| ) | ||
| ->executeQuery(); | ||
| while ($pageRow = $pageResult->fetchAssociative()) { | ||
| $pageTranslationCache[$pid][(int)$pageRow['sys_language_uid']] = true; | ||
| } | ||
| } | ||
| if (!isset($pageTranslationCache[$pid][$langId])) { | ||
| $row['_reasonBroken'] = 'MissingPageTranslation'; | ||
| $affectedRows[$tableName][(int)$row['uid']] = $row; | ||
| } | ||
| } | ||
| } | ||
| return $affectedRows; | ||
| } | ||
|
|
||
| protected function processRecords(SymfonyStyle $io, bool $simulate, array $affectedRecords): void | ||
| { | ||
| $this->softOrHardDeleteRecords($io, $simulate, $affectedRecords); | ||
| } | ||
|
|
||
| protected function recordDetails(SymfonyStyle $io, array $affectedRecords): void | ||
| { | ||
| $this->outputRecordDetails($io, $affectedRecords, '_reasonBroken'); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
Tests/Functional/Fixtures/TcaTablesTranslatedLanguageNotInSiteConfigurationFixed.csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| "sys_workspace" | ||
| ,"uid","pid","deleted","title" | ||
| ,1,0,0,"ws-1" | ||
| "pages" | ||
| ,"uid","pid","deleted","sys_language_uid","l10n_parent","t3ver_wsid","title" | ||
| ,1,0,0,0,0,0,"Site root with languages 0 and 1" | ||
| ,2,1,0,0,0,0,"Sub page 1" | ||
| ,3,1,0,1,2,0,"Ok page translation lang 1 for sub page 1" | ||
| ,4,1,1,2,2,0,"Not ok page translation lang 2 for sub page 1" | ||
| "tt_content" | ||
| ,"uid","pid","deleted","sys_language_uid","l18n_parent","t3ver_wsid","t3ver_state","header" | ||
| ,1,2,0,0,0,0,0,"Ok default content on sub page 1" | ||
| ,2,2,0,1,1,0,0,"Ok lang 1 content on sub page 1" | ||
| ,3,2,1,3,1,0,0,"Not ok lang 3 content on sub page 1" |
18 changes: 18 additions & 0 deletions
18
Tests/Functional/Fixtures/TcaTablesTranslatedLanguageNotInSiteConfigurationImport.csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| "sys_workspace" | ||
| ,"uid","pid","deleted","title" | ||
| ,1,0,0,"ws-1" | ||
| "pages" | ||
| ,"uid","pid","deleted","sys_language_uid","l10n_parent","t3ver_wsid","title" | ||
| ,1,0,0,0,0,0,"Site root with languages 0 and 1" | ||
| ,2,1,0,0,0,0,"Sub page 1" | ||
| ,3,1,0,1,2,0,"Ok page translation lang 1 for sub page 1" | ||
| # Should be set deleted=1 - language 2 is not in site config | ||
| ,4,1,0,2,2,0,"Not ok page translation lang 2 for sub page 1" | ||
| "tt_content" | ||
| ,"uid","pid","deleted","sys_language_uid","l18n_parent","t3ver_wsid","t3ver_state","header" | ||
| ,1,2,0,0,0,0,0,"Ok default content on sub page 1" | ||
| ,2,2,0,1,1,0,0,"Ok lang 1 content on sub page 1" | ||
| # Should be set deleted=1 - language 3 is not in site config | ||
| ,3,2,0,3,1,0,0,"Not ok lang 3 content on sub page 1" | ||
| # Should be removed - language 3 not in site config and workspace record | ||
| ,4,2,0,3,1,1,0,"Not ok lang 3 ws content on sub page 1" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not yet fully convinced here: delete_placeholders are overlays for live records to say "this live record does not exist in this workspace anymore". their sys_language_uid value may be irrelevant so it may be ok to skip them here. BUT: if we delete a live record here that has a delete_placeholder in some workspace, this delete_placeholder would be orphaned. As such, I guess we should probably establish another check looking for such cases, probably rather late in the chain. Can be done with a separate issue and patch, though.