Skip to content

Commit a3f5beb

Browse files
Introduce analyze processors
1 parent 30c341d commit a3f5beb

File tree

12 files changed

+1632
-905
lines changed

12 files changed

+1632
-905
lines changed

src/Analyzer/ConfluenceAnalyzer.php

Lines changed: 177 additions & 905 deletions
Large diffs are not rendered by default.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<?php
2+
3+
namespace HalloWelt\MigrateConfluence\Analyzer;
4+
5+
use DOMDocument;
6+
use DOMElement;
7+
use Psr\Log\LoggerInterface;
8+
use Symfony\Component\Console\Output\OutputInterface;
9+
10+
interface IAnalyzerProcessor {
11+
12+
/**
13+
* @param DOMElement $node
14+
* @return void
15+
*/
16+
public function execute( DOMDocument $dom ): void;
17+
18+
/**
19+
* @return array
20+
*/
21+
public function getRequiredKeys(): array;
22+
23+
/**
24+
* @return array
25+
*/
26+
public function getKeys(): array;
27+
28+
/**
29+
* @param string $key
30+
* @return array
31+
*/
32+
public function getData( string $key ): array;
33+
34+
/**
35+
* @param OutputInterface $output
36+
*/
37+
public function setOutput( OutputInterface $output ): void;
38+
39+
/**
40+
* @param LoggerInterface $logger
41+
* @return void
42+
*/
43+
public function setLogger( LoggerInterface $logger ): void;
44+
}
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
<?php
2+
3+
namespace HalloWelt\MigrateConfluence\Analyzer\Processor;
4+
5+
use DOMDocument;
6+
use DOMElement;
7+
use HalloWelt\MediaWiki\Lib\Migration\InvalidTitleException;
8+
use HalloWelt\MediaWiki\Lib\Migration\TitleBuilder as GenericTitleBuilder;
9+
use HalloWelt\MigrateConfluence\Utility\FilenameBuilder;
10+
use HalloWelt\MigrateConfluence\Utility\XMLHelper;
11+
use SplFileInfo;
12+
13+
class AttachmentFallback extends ProcessorBase {
14+
15+
/** @var XMLHelper */
16+
protected $xmlHelper;
17+
18+
/** @var mixed */
19+
private $attachmentId;
20+
21+
/** @var string */
22+
private $attachmentOrigFilename = '';
23+
24+
/**
25+
* @inheritDoc
26+
*/
27+
public function getRequiredKeys(): array {
28+
return [
29+
'analyze-attachment-available-ids',
30+
'analyze-added-attachment-id',
31+
'analyze-attachment-id-to-orig-filename-map',
32+
'analyze-attachment-id-to-space-id-map',
33+
'analyze-attachment-id-to-reference-map',
34+
'global-page-id-to-title-map',
35+
'analyze-page-id-to-confluence-key-map',
36+
'global-space-id-to-prefix-map',
37+
'analyze-add-file',
38+
'global-attachment-orig-filename-target-filename-map',
39+
'debug-analyze-invalid-titles-attachment-id-to-title',
40+
'global-filenames-to-filetitles-map',
41+
'analyze-attachment-id-to-target-filename-map',
42+
'global-title-attachments'
43+
];
44+
}
45+
/**
46+
* @inheritDoc
47+
*/
48+
public function getKeys(): array {
49+
return [
50+
'debug-analyze-invalid-titles-attachment-id-to-title',
51+
'global-additional-files',
52+
'analyze-add-file',
53+
'global-attachment-orig-filename-target-filename-map',
54+
'global-filenames-to-filetitles-map',
55+
'analyze-attachment-id-to-target-filename-map',
56+
'global-title-attachments'
57+
];
58+
}
59+
60+
/**
61+
* @inheritDoc
62+
*/
63+
public function doExecute( DOMDocument $dom ): void {
64+
$this->xmlHelper = new XMLHelper( $dom );
65+
66+
$objectNodes = $this->xmlHelper->getObjectNodes( 'Attachment' );
67+
if ( count( $objectNodes ) < 1 ) {
68+
return;
69+
}
70+
$objectNode = $objectNodes->item( 0 );
71+
if ( $objectNode instanceof DOMElement === false ) {
72+
return;
73+
}
74+
75+
$attachmentNodeContentStatus = $this->xmlHelper->getPropertyValue( 'contentStatus', $objectNode );
76+
if ( strtolower( $attachmentNodeContentStatus ) !== 'current' ) {
77+
return;
78+
}
79+
$this->attachmentId = $this->xmlHelper->getIDNodeValue( $objectNode );
80+
if ( in_array( $this->attachmentId, $this->data['analyze-added-attachment-id'] ) ) {
81+
return;
82+
}
83+
if ( !in_array( $this->attachmentId, $this->data['analyze-attachment-available-ids'] ) ) {
84+
return;
85+
}
86+
if ( !isset( $this->data['analyze-attachment-id-to-orig-filename-map'][$this->attachmentId] ) ) {
87+
return;
88+
}
89+
$this->attachmentOrigFilename = $this->data['analyze-attachment-id-to-orig-filename-map'][$this->attachmentId];
90+
91+
$this->process( $objectNode );
92+
}
93+
94+
private function process( DOMElement $node ): void {
95+
// Check to which page attachment belongs
96+
$targetTitle = '';
97+
$confluenceKey = '';
98+
$containerContentId = $this->xmlHelper->getPropertyValue( 'containerContent', $node );
99+
if ( $containerContentId !== null ) {
100+
if ( isset( $data['global-page-id-to-title-map'][$containerContentId] ) ) {
101+
$targetTitle = $this->data['global-page-id-to-title-map'][$containerContentId];
102+
}
103+
if ( isset( $data['analyze-page-id-to-confluence-key-map'][$containerContentId] ) ) {
104+
$confluenceKey = $this->data['analyze-page-id-to-confluence-key-map'][$containerContentId];
105+
} else {
106+
return;
107+
}
108+
}
109+
// TODO: Is this wise?
110+
$attachmentSpaceId = 0;
111+
if ( isset( $data['analyze-attachment-id-to-space-id-map'][$this->attachmentId] ) ) {
112+
$attachmentSpaceId = $this->data['analyze-attachment-id-to-space-id-map'][$this->attachmentId];
113+
}
114+
$attachmentTargetFilename = $this->makeAttachmentTargetFilenameFromData(
115+
$confluenceKey, $this->attachmentId, $attachmentSpaceId, $this->attachmentOrigFilename,
116+
$targetTitle, $this->data['global-space-id-to-prefix-map']
117+
);
118+
if ( $attachmentTargetFilename === '' ) {
119+
/*
120+
$this->customBuckets->addData(
121+
'debug-analyze-invalid-titles-attachment-id-to-title',
122+
$this->attachmentId, $attachmentTargetFilename
123+
);
124+
*/
125+
$this->data['debug-analyze-invalid-titles-attachment-id-to-title'][$this->attachmentId] = $attachmentTargetFilename;
126+
return;
127+
}
128+
129+
if ( !isset( $this->data['analyze-attachment-id-to-reference-map'][$this->attachmentId] ) ) {
130+
$this->output->writeln(
131+
//phpcs:ignore Generic.Files.LineLength.TooLong
132+
"\033[31m\t- File '$this->attachmentId' ($attachmentTargetFilename) not found\033[39m"
133+
);
134+
return;
135+
}
136+
137+
$attachmentReference = $this->data['analyze-attachment-id-to-reference-map'][$this->attachmentId];
138+
139+
if ( $confluenceKey !== '' ) {
140+
//$this->addTitleAttachment( $targetTitle, $attachmentTargetFilename );
141+
$this->data['global-title-attachments'][$targetTitle][] = $attachmentTargetFilename;
142+
$this->output->writeln( "Add attachment $attachmentTargetFilename (fallback: {$confluenceKey})" );
143+
} else {
144+
/*
145+
$this->buckets->addData(
146+
'global-additional-files', $attachmentTargetFilename, $attachmentReference, false, true );
147+
*/
148+
$this->data['global-additional-files'][$attachmentTargetFilename] = $attachmentReference;
149+
$this->output->writeln( "Add attachment $attachmentTargetFilename (additional)" );
150+
}
151+
152+
//$this->addFile( $attachmentTargetFilename, $attachmentReference );
153+
$this->data['analyze-add-file'][$attachmentTargetFilename] = $attachmentReference;
154+
$this->data['analyze-added-attachment-id'][] = $this->attachmentId;
155+
156+
$confluenceFileKey = str_replace( ' ', '', "{$confluenceKey}---{$this->attachmentOrigFilename}" );
157+
/*
158+
$this->buckets->addData(
159+
'global-filenames-to-filetitles-map',
160+
$confluenceFileKey,
161+
$attachmentTargetFilename,
162+
false,
163+
true
164+
);
165+
166+
$this->customBuckets->addData(
167+
'analyze-attachment-id-to-target-filename-map',
168+
$attachmentId,
169+
$attachmentTargetFilename
170+
);
171+
172+
$this->buckets->addData(
173+
'global-attachment-orig-filename-target-filename-map',
174+
$attachmentOrigFilename,
175+
$attachmentTargetFilename
176+
);
177+
*/
178+
$this->data['global-filenames-to-filetitles-map'][$confluenceFileKey] = $attachmentTargetFilename;
179+
$this->data['analyze-attachment-id-to-target-filename-map'][$this->attachmentId] = $attachmentTargetFilename;
180+
$this->data['global-attachment-orig-filename-target-filename-map'][$this->attachmentOrigFilename] = $attachmentTargetFilename;
181+
}
182+
183+
184+
/**
185+
* @param string $pageConfluenceTitle
186+
* @param int $attachmentId
187+
* @param int $attachmentSpaceId
188+
* @param string $attachmentOrigFilename
189+
* @param string $containerTitle
190+
* @param array $spaceIdToPrefixMap
191+
* @return string
192+
*/
193+
private function makeAttachmentTargetFilenameFromData(
194+
string $pageConfluenceTitle, int $attachmentId, int $attachmentSpaceId,
195+
string $attachmentOrigFilename, string $containerTitle, array $spaceIdToPrefixMap
196+
): string {
197+
$filenameBuilder = new FilenameBuilder( $spaceIdToPrefixMap, null );
198+
try {
199+
$targetName = $filenameBuilder->buildFromAttachmentData(
200+
$attachmentSpaceId, $attachmentOrigFilename, $containerTitle );
201+
} catch ( InvalidTitleException $e ) {
202+
try {
203+
// Probably it is just too long. Let's try to use a shortened variant
204+
// This is not ideal, but should be okay as a fallback in most cases.
205+
$shortTargetTitle = basename( $containerTitle );
206+
$targetName = $filenameBuilder->buildFromAttachmentData(
207+
$attachmentSpaceId, $attachmentOrigFilename, $shortTargetTitle );
208+
} catch ( InvalidTitleException $ex ) {
209+
/*
210+
$this->customBuckets->addData(
211+
'debug-analyze-invalid-titles-attachment-id-to-title',
212+
$attachmentId, $ex->getInvalidTitle()
213+
);
214+
*/
215+
$this->data['debug-analyze-invalid-titles-attachment-id-to-title'][$attachmentId] = $ex->getInvalidTitle();
216+
$this->logger->error( $ex->getMessage() );
217+
$targetName = $ex->getInvalidTitle();
218+
}
219+
}
220+
221+
/*
222+
* Some attachments do not have a file extension available. We try
223+
* to find an extension by looking a the content type, but
224+
* sometimes even this won't help... ("octet-stream")
225+
*/
226+
$file = new SplFileInfo( $targetName );
227+
if ( $this->hasNoExplicitFileExtension( $file ) ) {
228+
$this->logger->debug(
229+
"Could not find file extension for $attachmentId"
230+
);
231+
$targetName .= '.unknown';
232+
}
233+
234+
$fileKey = "{$pageConfluenceTitle}---$attachmentOrigFilename";
235+
// Some normalization
236+
$fileKey = str_replace( ' ', '_', $fileKey );
237+
//$this->buckets->addData( 'global-filenames-to-filetitles-map', $fileKey, $targetName, false, true );
238+
$this->data['global-filenames-to-filetitles-map'][$fileKey] = $targetName;
239+
240+
return $targetName;
241+
}
242+
243+
/**
244+
* @param SplFileInfo $file
245+
* @return bool
246+
*/
247+
private function hasNoExplicitFileExtension( $file ) {
248+
if ( $file->getExtension() === '' ) {
249+
return true;
250+
}
251+
// Evil hack for Names like "02.1 Some-Workflow File"
252+
if ( strlen( $file->getExtension() ) > 10 ) {
253+
254+
}
255+
return false;
256+
}
257+
}

0 commit comments

Comments
 (0)