Skip to content

Commit 00565e8

Browse files
authored
Merge pull request #577 from nextcloud/feat/parallel-generate-all
feat: implement spawning multiple workers for generate-all
2 parents 36b2e35 + 2c35001 commit 00565e8

File tree

2 files changed

+177
-14
lines changed

2 files changed

+177
-14
lines changed

lib/Command/Generate.php

Lines changed: 123 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
namespace OCA\PreviewGenerator\Command;
1111

1212
use OCA\Files_External\Service\GlobalStoragesService;
13+
use OCA\PreviewGenerator\Model\WorkerConfig;
1314
use OCA\PreviewGenerator\SizeHelper;
1415
use OCP\Encryption\IManager;
1516
use OCP\Files\File;
@@ -32,6 +33,9 @@
3233
use Symfony\Component\Console\Output\OutputInterface;
3334

3435
class Generate extends Command {
36+
private const ENV_WORKER_CONF = 'PREVIEWGENERATOR_WORKER_CONF';
37+
private const OPT_WORKERS = 'workers';
38+
3539
/* @return array{width: int, height: int, crop: bool} */
3640
protected array $specifications;
3741

@@ -44,6 +48,8 @@ class Generate extends Command {
4448
protected IManager $encryptionManager;
4549
protected SizeHelper $sizeHelper;
4650

51+
private ?WorkerConfig $workerConfig = null;
52+
4753
public function __construct(IRootFolder $rootFolder,
4854
IUserManager $userManager,
4955
IPreview $previewGenerator,
@@ -80,6 +86,11 @@ protected function configure(): void {
8086
'p',
8187
InputOption::VALUE_OPTIONAL | InputOption::VALUE_IS_ARRAY,
8288
'limit scan to this path, eg. --path="/alice/files/Photos", the user_id is determined by the path and all user_id arguments are ignored, multiple usages allowed'
89+
)->addOption(
90+
self::OPT_WORKERS,
91+
'w',
92+
InputOption::VALUE_OPTIONAL,
93+
'Spawn multiple parallel workers to increase speed of preview generation',
8394
);
8495
}
8596

@@ -95,10 +106,76 @@ protected function execute(InputInterface $input, OutputInterface $output): int
95106
$this->output = $output;
96107

97108
$this->specifications = $this->sizeHelper->generateSpecifications();
98-
if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERY_VERBOSE) {
109+
if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERY_VERBOSE
110+
&& !getenv(self::ENV_WORKER_CONF)
111+
) {
99112
$output->writeln('Specifications: ' . json_encode($this->specifications));
100113
}
101114

115+
if (getenv(self::ENV_WORKER_CONF)) {
116+
return $this->executeWorker($input);
117+
}
118+
119+
if ($input->getOption(self::OPT_WORKERS)) {
120+
return $this->executeCoordinator($input);
121+
}
122+
123+
return $this->executeDefault($input);
124+
}
125+
126+
private function executeCoordinator(InputInterface $input) {
127+
128+
$workerCount = (int)$input->getOption(self::OPT_WORKERS);
129+
if ($workerCount <= 0) {
130+
$this->output->writeln("<error>Invalid worker count: $workerCount</error>");
131+
return 1;
132+
}
133+
134+
$workerPids = [];
135+
for ($i = 0; $i < $workerCount; $i++) {
136+
$this->output->writeln("Spawning worker $i");
137+
138+
$workerconfig = new WorkerConfig($i, $workerCount);
139+
$pid = pcntl_fork();
140+
if ($pid == -1) {
141+
$this->output->writeln('<error>Failed to fork worker</error>');
142+
return 1;
143+
} elseif ($pid) {
144+
// Parent
145+
$workerPids[] = $pid;
146+
} else {
147+
// Child
148+
$argv = $_SERVER['argv'];
149+
$env = getenv();
150+
$env[self::ENV_WORKER_CONF] = json_encode($workerconfig, JSON_THROW_ON_ERROR);
151+
pcntl_exec($argv[0], array_slice($argv, 1), $env);
152+
}
153+
}
154+
155+
$workerFailed = false;
156+
foreach ($workerPids as $index => $pid) {
157+
$status = 0;
158+
pcntl_waitpid($pid, $status);
159+
$exitCode = pcntl_wexitstatus($status);
160+
161+
if ($exitCode !== 0) {
162+
$workerFailed = true;
163+
}
164+
165+
$this->output->writeln("Worker $index exited with code $exitCode");
166+
}
167+
168+
return $workerFailed ? 1 : 0;
169+
}
170+
171+
private function executeWorker(InputInterface $input): int {
172+
$workerConfigEnv = getenv(self::ENV_WORKER_CONF);
173+
$data = json_decode($workerConfigEnv, true);
174+
$this->workerConfig = WorkerConfig::fromJson($data);
175+
return $this->executeDefault($input);
176+
}
177+
178+
private function executeDefault(InputInterface $input): int {
102179
$inputPaths = $input->getOption('path');
103180
if ($inputPaths) {
104181
foreach ($inputPaths as $inputPath) {
@@ -177,11 +254,15 @@ private function parseFolder(Folder $folder, array $noPreviewMountPaths): void {
177254
// Respect the '.nomedia' file. If present don't traverse the folder
178255
// Same for external mounts with previews disabled
179256
if ($folder->nodeExists('.nomedia') || in_array($folderPath, $noPreviewMountPaths)) {
180-
$this->output->writeln('Skipping folder ' . $folderPath);
257+
if ($this->workerConfig === null) {
258+
$this->output->writeln('Skipping folder ' . $folderPath);
259+
}
181260
return;
182261
}
183262

184-
$this->output->writeln('Scanning folder ' . $folderPath);
263+
if ($this->workerConfig === null) {
264+
$this->output->writeln('Scanning folder ' . $folderPath);
265+
}
185266

186267
$nodes = $folder->getDirectoryListing();
187268

@@ -201,20 +282,48 @@ private function parseFolder(Folder $folder, array $noPreviewMountPaths): void {
201282
}
202283

203284
private function parseFile(File $file): void {
204-
if ($this->previewGenerator->isMimeSupported($file->getMimeType())) {
205-
if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) {
206-
$this->output->writeln('Generating previews for ' . $file->getPath());
285+
if (!$this->previewGenerator->isMimeSupported($file->getMimeType())) {
286+
return;
287+
}
288+
289+
if ($this->workerConfig !== null) {
290+
$hash = $this->hashFileId($file->getId());
291+
if (($hash % $this->workerConfig->getWorkerCount()) !== $this->workerConfig->getWorkerIndex()) {
292+
return;
207293
}
294+
}
208295

209-
try {
210-
$this->previewGenerator->generatePreviews($file, $this->specifications);
211-
} catch (NotFoundException $e) {
212-
// Maybe log that previews could not be generated?
213-
} catch (\InvalidArgumentException|GenericFileException $e) {
214-
$class = $e::class;
215-
$error = $e->getMessage();
216-
$this->output->writeln("<error>{$class}: {$error}</error>");
296+
if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) {
297+
$prefix = '';
298+
if ($this->workerConfig !== null) {
299+
$workerIndex = $this->workerConfig->getWorkerIndex();
300+
$prefix = "[WORKER $workerIndex] ";
217301
}
302+
$this->output->writeln("{$prefix}Generating previews for " . $file->getPath());
303+
}
304+
305+
try {
306+
$this->previewGenerator->generatePreviews($file, $this->specifications);
307+
} catch (NotFoundException $e) {
308+
// Maybe log that previews could not be generated?
309+
} catch (\InvalidArgumentException|GenericFileException $e) {
310+
$class = $e::class;
311+
$error = $e->getMessage();
312+
$this->output->writeln("<error>{$class}: {$error}</error>");
218313
}
219314
}
315+
316+
/**
317+
* Hash the given file id into an integer to ensure even distribution of work between workers.
318+
*/
319+
private function hashFileId(int $fileId): int {
320+
// Fall back to 32 bit hash on 32 bit systems
321+
if (PHP_INT_SIZE === 4) {
322+
$digest = hash('xxh32', (string)$fileId, true);
323+
return unpack('l', $digest)[1];
324+
}
325+
326+
$digest = hash('xxh3', (string)$fileId, true);
327+
return unpack('q', $digest)[1];
328+
}
220329
}

lib/Model/WorkerConfig.php

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/**
6+
* SPDX-FileCopyrightText: 2025 Richard Steinmetz
7+
* SPDX-License-Identifier: AGPL-3.0-or-later
8+
*/
9+
10+
namespace OCA\PreviewGenerator\Model;
11+
12+
final class WorkerConfig implements \JsonSerializable {
13+
private const WORKER_INDEX_KEY = 'workerIndex';
14+
private const WORKER_COUNT_KEY = 'workerCount';
15+
16+
public function __construct(
17+
private readonly int $workerIndex,
18+
private readonly int $workerCount,
19+
) {
20+
}
21+
22+
/**
23+
* @throws \InvalidArgumentException If the given JSON data is not valid
24+
*/
25+
public static function fromJson(array $data): self {
26+
$workerIndex = $data[self::WORKER_INDEX_KEY] ?? null;
27+
if (!is_int($workerIndex)) {
28+
throw new \InvalidArgumentException('Invalid worker data: Missing worker index');
29+
}
30+
31+
$workerCount = $data[self::WORKER_COUNT_KEY] ?? null;
32+
if (!is_int($workerCount)) {
33+
throw new \InvalidArgumentException('Invalid worker data: Missing worker count');
34+
}
35+
36+
return new self($workerIndex, $workerCount);
37+
}
38+
39+
public function getWorkerIndex(): int {
40+
return $this->workerIndex;
41+
}
42+
43+
public function getWorkerCount(): int {
44+
return $this->workerCount;
45+
}
46+
47+
#[\ReturnTypeWillChange]
48+
public function jsonSerialize() {
49+
return [
50+
self::WORKER_INDEX_KEY => $this->workerIndex,
51+
self::WORKER_COUNT_KEY => $this->workerCount,
52+
];
53+
}
54+
}

0 commit comments

Comments
 (0)