Skip to content

Commit 0101c5a

Browse files
committed
[WIP][FEATURE] Add dbdoctor rule for orphaned translation records
Introduce a new dbdoctor check that detects orphaned translation records created by copy operations before proper language filtering was enforced. The rule flags translated records that are unusable in their target context because: - the translation language is not configured in the target site, or - the target page lacks a pages translation for the record language. This covers copy scenarios both within a single site and across different root sites. The check follows the semantics of DataHandler::copyL10nOverlayRecords() and provides safe fix suggestions (soft-delete where supported).
1 parent 986a1a5 commit 0101c5a

9 files changed

+587
-0
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Lolli\Dbdoctor\HealthCheck;
6+
7+
/*
8+
* This file is part of the TYPO3 CMS project.
9+
*
10+
* It is free software; you can redistribute it and/or modify it under
11+
* the terms of the GNU General Public License, either version 2
12+
* of the License, or any later version.
13+
*
14+
* For the full copyright and license information, please read the
15+
* LICENSE.txt file that was distributed with this source code.
16+
*
17+
* The TYPO3 project - inspiring people to share!
18+
*/
19+
use Lolli\Dbdoctor\Helper\TableHelper;
20+
use Symfony\Component\Console\Style\SymfonyStyle;
21+
use TYPO3\CMS\Core\Database\Connection;
22+
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
23+
use TYPO3\CMS\Core\Exception\SiteNotFoundException;
24+
use TYPO3\CMS\Core\Site\Entity\Site;
25+
use TYPO3\CMS\Core\Site\SiteFinder;
26+
use TYPO3\CMS\Core\Utility\GeneralUtility;
27+
28+
/**
29+
* Translated records should reference a sys_language_uid that is configured in the
30+
* site configuration of the page they are located on. Records with a language not
31+
* in the site configuration are orphaned translations typically created by DataHandler
32+
* copy operations between sites.
33+
*/
34+
final class TcaTablesTranslatedLanguageNotInSiteConfiguration extends AbstractHealthCheck implements HealthCheckInterface
35+
{
36+
private SiteFinder $siteFinder;
37+
38+
public function __construct(SiteFinder $siteFinder)
39+
{
40+
$this->siteFinder = $siteFinder;
41+
}
42+
43+
public function header(SymfonyStyle $io): void
44+
{
45+
$io->section('Scan for translated records with language not in site configuration');
46+
$this->outputClass($io);
47+
$this->outputTags($io, self::TAG_SOFT_DELETE, self::TAG_REMOVE, self::TAG_WORKSPACE_REMOVE);
48+
$io->text([
49+
'Translated records reference a sys_language_uid. This language must be configured',
50+
'in the site configuration of the page they are located on. This check finds records',
51+
'with a sys_language_uid that does not exist in the site configuration and removes them.',
52+
]);
53+
}
54+
55+
protected function getAffectedRecords(): array
56+
{
57+
/** @var TableHelper $tableHelper */
58+
$tableHelper = $this->container->get(TableHelper::class);
59+
60+
/** @var array<int, Site|false> $siteCache */
61+
$siteCache = [];
62+
/** @var array<string, array<int, true>> $siteLanguageCache */
63+
$siteLanguageCache = [];
64+
65+
$affectedRows = [];
66+
foreach ($this->tcaHelper->getNextLanguageAwareTcaTable() as $tableName) {
67+
if (!$tableHelper->tableExistsInDatabase($tableName)) {
68+
// TCA may define tables not yet present in database schema.
69+
continue;
70+
}
71+
72+
/** @var string $languageField */
73+
$languageField = $this->tcaHelper->getLanguageField($tableName);
74+
$workspaceIdField = $this->tcaHelper->getWorkspaceIdField($tableName);
75+
$isTableWorkspaceAware = !empty($workspaceIdField);
76+
77+
$selectFields = [
78+
'uid',
79+
'pid',
80+
$languageField,
81+
];
82+
if ($isTableWorkspaceAware) {
83+
$selectFields[] = $workspaceIdField;
84+
$selectFields[] = 't3ver_state';
85+
}
86+
87+
$queryBuilder = $this->connectionPool->getQueryBuilderForTable($tableName);
88+
// Do not consider already deleted records: Those are not visible and will not cause
89+
// issues. Reducing the number of affected records avoids unnecessary noise.
90+
$queryBuilder->getRestrictions()->removeAll()->add(GeneralUtility::makeInstance(DeletedRestriction::class));
91+
$queryBuilder
92+
->select(...$selectFields)
93+
->from($tableName)
94+
->where(
95+
$queryBuilder->expr()->gt($languageField, $queryBuilder->createNamedParameter(0, Connection::PARAM_INT)),
96+
$queryBuilder->expr()->neq($languageField, $queryBuilder->createNamedParameter(-1, Connection::PARAM_INT))
97+
)
98+
->orderBy('uid');
99+
100+
if ($isTableWorkspaceAware) {
101+
// Skip DELETE_PLACEHOLDER records (t3ver_state = 2), those are workspace internals.
102+
$queryBuilder->andWhere(
103+
$queryBuilder->expr()->neq('t3ver_state', $queryBuilder->createNamedParameter(2, Connection::PARAM_INT))
104+
);
105+
}
106+
107+
$result = $queryBuilder->executeQuery();
108+
while ($row = $result->fetchAssociative()) {
109+
/** @var array<string, int|string> $row */
110+
$pid = (int)$row['pid'];
111+
$langId = (int)$row[$languageField];
112+
113+
// Resolve site for this pid, cached. Records on pages without site config
114+
// (e.g. pid 0 or pages not below a site root) are skipped: No site means
115+
// no language configuration to validate against.
116+
if (!array_key_exists($pid, $siteCache)) {
117+
try {
118+
$siteCache[$pid] = $this->siteFinder->getSiteByPageId($pid);
119+
} catch (SiteNotFoundException) {
120+
$siteCache[$pid] = false;
121+
}
122+
}
123+
if ($siteCache[$pid] === false) {
124+
continue;
125+
}
126+
$site = $siteCache[$pid];
127+
128+
$siteIdentifier = $site->getIdentifier();
129+
if (!isset($siteLanguageCache[$siteIdentifier])) {
130+
$siteLanguageCache[$siteIdentifier] = [];
131+
foreach ($site->getAllLanguages() as $siteLanguage) {
132+
$siteLanguageCache[$siteIdentifier][$siteLanguage->getLanguageId()] = true;
133+
}
134+
}
135+
136+
if (!isset($siteLanguageCache[$siteIdentifier][$langId])) {
137+
$row['_reasonBroken'] = 'LanguageNotInSiteConfiguration';
138+
$affectedRows[$tableName][(int)$row['uid']] = $row;
139+
}
140+
}
141+
}
142+
return $affectedRows;
143+
}
144+
145+
protected function processRecords(SymfonyStyle $io, bool $simulate, array $affectedRecords): void
146+
{
147+
$this->softOrHardDeleteRecords($io, $simulate, $affectedRecords);
148+
}
149+
150+
protected function recordDetails(SymfonyStyle $io, array $affectedRecords): void
151+
{
152+
$this->outputRecordDetails($io, $affectedRecords, '_reasonBroken');
153+
}
154+
}
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Lolli\Dbdoctor\HealthCheck;
6+
7+
/*
8+
* This file is part of the TYPO3 CMS project.
9+
*
10+
* It is free software; you can redistribute it and/or modify it under
11+
* the terms of the GNU General Public License, either version 2
12+
* of the License, or any later version.
13+
*
14+
* For the full copyright and license information, please read the
15+
* LICENSE.txt file that was distributed with this source code.
16+
*
17+
* The TYPO3 project - inspiring people to share!
18+
*/
19+
use Lolli\Dbdoctor\Helper\TableHelper;
20+
use Symfony\Component\Console\Style\SymfonyStyle;
21+
use TYPO3\CMS\Core\Database\Connection;
22+
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
23+
use TYPO3\CMS\Core\Exception\SiteNotFoundException;
24+
use TYPO3\CMS\Core\Site\Entity\Site;
25+
use TYPO3\CMS\Core\Site\SiteFinder;
26+
use TYPO3\CMS\Core\Utility\GeneralUtility;
27+
28+
/**
29+
* Translated records (except pages) need a corresponding page translation to exist.
30+
* A record translated to a language for which no page translation exists on the same
31+
* pid is orphaned and should be removed. This check only considers languages that are
32+
* configured in the site configuration.
33+
*/
34+
final class TcaTablesTranslatedLanguagePageTranslationMissing extends AbstractHealthCheck implements HealthCheckInterface
35+
{
36+
private SiteFinder $siteFinder;
37+
38+
public function __construct(SiteFinder $siteFinder)
39+
{
40+
$this->siteFinder = $siteFinder;
41+
}
42+
43+
public function header(SymfonyStyle $io): void
44+
{
45+
$io->section('Scan for translated records without corresponding page translation');
46+
$this->outputClass($io);
47+
$this->outputTags($io, self::TAG_SOFT_DELETE, self::TAG_REMOVE, self::TAG_WORKSPACE_REMOVE);
48+
$io->text([
49+
'Translated records need a corresponding page translation on their pid to be valid.',
50+
'When a page translation for a given language does not exist, content records translated',
51+
'to that language are orphaned. This check finds and removes such records. Only languages',
52+
'configured in the site configuration are considered.',
53+
]);
54+
}
55+
56+
protected function getAffectedRecords(): array
57+
{
58+
/** @var TableHelper $tableHelper */
59+
$tableHelper = $this->container->get(TableHelper::class);
60+
61+
if (!$tableHelper->tableExistsInDatabase('pages')) {
62+
// pages table is required for page translation lookups.
63+
return [];
64+
}
65+
66+
/** @var array<int, Site|false> $siteCache */
67+
$siteCache = [];
68+
/** @var array<string, array<int, true>> $siteLanguageCache */
69+
$siteLanguageCache = [];
70+
/** @var array<int, array<int, true>> $pageTranslationCache */
71+
$pageTranslationCache = [];
72+
73+
$affectedRows = [];
74+
foreach ($this->tcaHelper->getNextLanguageAwareTcaTable(['pages']) as $tableName) {
75+
if (!$tableHelper->tableExistsInDatabase($tableName)) {
76+
// TCA may define tables not yet present in database schema.
77+
continue;
78+
}
79+
80+
/** @var string $languageField */
81+
$languageField = $this->tcaHelper->getLanguageField($tableName);
82+
$workspaceIdField = $this->tcaHelper->getWorkspaceIdField($tableName);
83+
$isTableWorkspaceAware = !empty($workspaceIdField);
84+
85+
$selectFields = [
86+
'uid',
87+
'pid',
88+
$languageField,
89+
];
90+
if ($isTableWorkspaceAware) {
91+
$selectFields[] = $workspaceIdField;
92+
$selectFields[] = 't3ver_state';
93+
}
94+
95+
$queryBuilder = $this->connectionPool->getQueryBuilderForTable($tableName);
96+
// Do not consider already deleted records: Those are not visible and will not cause
97+
// issues. Reducing the number of affected records avoids unnecessary noise.
98+
$queryBuilder->getRestrictions()->removeAll()->add(GeneralUtility::makeInstance(DeletedRestriction::class));
99+
$queryBuilder
100+
->select(...$selectFields)
101+
->from($tableName)
102+
->where(
103+
$queryBuilder->expr()->gt($languageField, $queryBuilder->createNamedParameter(0, Connection::PARAM_INT)),
104+
$queryBuilder->expr()->neq($languageField, $queryBuilder->createNamedParameter(-1, Connection::PARAM_INT))
105+
)
106+
->orderBy('uid');
107+
108+
if ($isTableWorkspaceAware) {
109+
// Skip DELETE_PLACEHOLDER records (t3ver_state = 2), those are workspace internals.
110+
$queryBuilder->andWhere(
111+
$queryBuilder->expr()->neq('t3ver_state', $queryBuilder->createNamedParameter(2, Connection::PARAM_INT))
112+
);
113+
}
114+
115+
$result = $queryBuilder->executeQuery();
116+
while ($row = $result->fetchAssociative()) {
117+
/** @var array<string, int|string> $row */
118+
$pid = (int)$row['pid'];
119+
$langId = (int)$row[$languageField];
120+
121+
// Resolve site for this pid, cached. Records on pages without site config
122+
// (e.g. pid 0 or pages not below a site root) are skipped: No site means
123+
// no language configuration to validate against.
124+
if (!array_key_exists($pid, $siteCache)) {
125+
try {
126+
$siteCache[$pid] = $this->siteFinder->getSiteByPageId($pid);
127+
} catch (SiteNotFoundException) {
128+
$siteCache[$pid] = false;
129+
}
130+
}
131+
if ($siteCache[$pid] === false) {
132+
continue;
133+
}
134+
$site = $siteCache[$pid];
135+
136+
// Only check languages that are configured in the site. Records with a language
137+
// not in site config are handled by TcaTablesTranslatedLanguageNotInSiteConfiguration.
138+
$siteIdentifier = $site->getIdentifier();
139+
if (!isset($siteLanguageCache[$siteIdentifier])) {
140+
$siteLanguageCache[$siteIdentifier] = [];
141+
foreach ($site->getAllLanguages() as $siteLanguage) {
142+
$siteLanguageCache[$siteIdentifier][$siteLanguage->getLanguageId()] = true;
143+
}
144+
}
145+
if (!isset($siteLanguageCache[$siteIdentifier][$langId])) {
146+
continue;
147+
}
148+
149+
// Check if a page translation exists for this pid and language, cached.
150+
// Do not consider deleted page translations: A deleted page translation means
151+
// content translations on that page are orphaned and should be removed.
152+
if (!isset($pageTranslationCache[$pid])) {
153+
$pageTranslationCache[$pid] = [];
154+
$pageQueryBuilder = $this->connectionPool->getQueryBuilderForTable('pages');
155+
$pageQueryBuilder->getRestrictions()->removeAll()->add(GeneralUtility::makeInstance(DeletedRestriction::class));
156+
$pageResult = $pageQueryBuilder
157+
->select('sys_language_uid')
158+
->from('pages')
159+
->where(
160+
$pageQueryBuilder->expr()->eq('l10n_parent', $pageQueryBuilder->createNamedParameter($pid, Connection::PARAM_INT)),
161+
$pageQueryBuilder->expr()->gt('sys_language_uid', $pageQueryBuilder->createNamedParameter(0, Connection::PARAM_INT))
162+
)
163+
->executeQuery();
164+
while ($pageRow = $pageResult->fetchAssociative()) {
165+
$pageTranslationCache[$pid][(int)$pageRow['sys_language_uid']] = true;
166+
}
167+
}
168+
if (!isset($pageTranslationCache[$pid][$langId])) {
169+
$row['_reasonBroken'] = 'MissingPageTranslation';
170+
$affectedRows[$tableName][(int)$row['uid']] = $row;
171+
}
172+
}
173+
}
174+
return $affectedRows;
175+
}
176+
177+
protected function processRecords(SymfonyStyle $io, bool $simulate, array $affectedRecords): void
178+
{
179+
$this->softOrHardDeleteRecords($io, $simulate, $affectedRecords);
180+
}
181+
182+
protected function recordDetails(SymfonyStyle $io, array $affectedRecords): void
183+
{
184+
$this->outputRecordDetails($io, $affectedRecords, '_reasonBroken');
185+
}
186+
}

Classes/HealthFactory/HealthFactory.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ final class HealthFactory implements HealthFactoryInterface
7272
HealthCheck\SysFileReferenceDangling::class,
7373
HealthCheck\TcaTablesTranslatedLanguageParentDeleted::class,
7474
HealthCheck\TcaTablesTranslatedLanguageParentDifferentPid::class,
75+
HealthCheck\TcaTablesTranslatedLanguageNotInSiteConfiguration::class,
76+
HealthCheck\TcaTablesTranslatedLanguagePageTranslationMissing::class,
7577
// TcaTablesInvalidLanguageParent::class,
7678
HealthCheck\InlineForeignFieldChildrenParentMissing::class,
7779
HealthCheck\InlineForeignFieldNoForeignTableFieldChildrenParentMissing::class,
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"sys_workspace"
2+
,"uid","pid","deleted","title"
3+
,1,0,0,"ws-1"
4+
"pages"
5+
,"uid","pid","deleted","sys_language_uid","l10n_parent","t3ver_wsid","title"
6+
,1,0,0,0,0,0,"Site root with languages 0 and 1"
7+
,2,1,0,0,0,0,"Sub page 1"
8+
,3,1,0,1,2,0,"Ok page translation lang 1 for sub page 1"
9+
,4,1,1,2,2,0,"Not ok page translation lang 2 for sub page 1"
10+
"tt_content"
11+
,"uid","pid","deleted","sys_language_uid","l18n_parent","t3ver_wsid","t3ver_state","header"
12+
,1,2,0,0,0,0,0,"Ok default content on sub page 1"
13+
,2,2,0,1,1,0,0,"Ok lang 1 content on sub page 1"
14+
,3,2,1,3,1,0,0,"Not ok lang 3 content on sub page 1"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"sys_workspace"
2+
,"uid","pid","deleted","title"
3+
,1,0,0,"ws-1"
4+
"pages"
5+
,"uid","pid","deleted","sys_language_uid","l10n_parent","t3ver_wsid","title"
6+
,1,0,0,0,0,0,"Site root with languages 0 and 1"
7+
,2,1,0,0,0,0,"Sub page 1"
8+
,3,1,0,1,2,0,"Ok page translation lang 1 for sub page 1"
9+
# Should be set deleted=1 - language 2 is not in site config
10+
,4,1,0,2,2,0,"Not ok page translation lang 2 for sub page 1"
11+
"tt_content"
12+
,"uid","pid","deleted","sys_language_uid","l18n_parent","t3ver_wsid","t3ver_state","header"
13+
,1,2,0,0,0,0,0,"Ok default content on sub page 1"
14+
,2,2,0,1,1,0,0,"Ok lang 1 content on sub page 1"
15+
# Should be set deleted=1 - language 3 is not in site config
16+
,3,2,0,3,1,0,0,"Not ok lang 3 content on sub page 1"
17+
# Should be removed - language 3 not in site config and workspace record
18+
,4,2,0,3,1,1,0,"Not ok lang 3 ws content on sub page 1"

0 commit comments

Comments
 (0)