From 2d07ee671e66af1dac70600df78c30edd5f9d205 Mon Sep 17 00:00:00 2001 From: Pierre Rineau Date: Thu, 24 Jul 2025 11:47:58 +0200 Subject: [PATCH 1/4] no issue - introduce anonymizator Context, for AbstractAnonymizer signature change in 3.0 --- src/Anonymization/Anonymizator.php | 12 +- .../Anonymizer/AbstractAnonymizer.php | 20 +++- src/Anonymization/Anonymizer/Context.php | 111 ++++++++++++++++++ 3 files changed, 138 insertions(+), 5 deletions(-) create mode 100644 src/Anonymization/Anonymizer/Context.php diff --git a/src/Anonymization/Anonymizator.php b/src/Anonymization/Anonymizator.php index 2adcc576..6fbb6536 100644 --- a/src/Anonymization/Anonymizator.php +++ b/src/Anonymization/Anonymizator.php @@ -6,6 +6,7 @@ use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AbstractAnonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AnonymizerRegistry; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Config\AnonymizationConfig; use MakinaCorpus\DbToolsBundle\Anonymization\Config\AnonymizerConfig; use MakinaCorpus\DbToolsBundle\Helper\Format; @@ -48,6 +49,7 @@ public function __construct( private AnonymizerRegistry $anonymizerRegistry, private AnonymizationConfig $anonymizationConfig, private ?string $salt = null, + private readonly Context $defaultContext = new Context(), ) { $this->logger = new NullLogger(); $this->output = new NullOutput(); @@ -84,12 +86,13 @@ protected function getSalt(): string /** * Create anonymizer instance. */ - protected function createAnonymizer(AnonymizerConfig $config): AbstractAnonymizer + protected function createAnonymizer(AnonymizerConfig $config, Context $context): AbstractAnonymizer { return $this->anonymizerRegistry->createAnonymizer( $config->anonymizer, $config, - $config->options->with(['salt' => $this->getSalt()]), + // @todo "salt" should belong to context instead. + $context->withOptions($config->options->with(['salt' => $this->getSalt()])), $this->databaseSession ); } @@ -127,6 +130,7 @@ public function anonymize( } $plan = []; + $context = clone $this->defaultContext; if ($onlyTargets) { foreach ($onlyTargets as $targetString) { @@ -160,7 +164,7 @@ public function anonymize( foreach ($plan as $table => $targets) { $anonymizers[$table] = []; foreach ($this->anonymizationConfig->getTableConfig($table, $targets) as $target => $config) { - $anonymizers[$table][] = $this->createAnonymizer($config); + $anonymizers[$table][] = $this->createAnonymizer($config, $context); } } @@ -910,7 +914,7 @@ public function checkAnonymizationConfig(): array foreach ($this->anonymizationConfig->all() as $table => $tableConfig) { foreach ($tableConfig as $config) { try { - $this->createAnonymizer($config); + $this->createAnonymizer($config, $this->defaultContext); } catch (\Exception $e) { if (!\key_exists($table, $errors)) { $errors[$table] = []; diff --git a/src/Anonymization/Anonymizer/AbstractAnonymizer.php b/src/Anonymization/Anonymizer/AbstractAnonymizer.php index a8c3c49c..645dc8ac 100644 --- a/src/Anonymization/Anonymizer/AbstractAnonymizer.php +++ b/src/Anonymization/Anonymizer/AbstractAnonymizer.php @@ -18,12 +18,30 @@ abstract class AbstractAnonymizer public const JOIN_TABLE = '_target_table'; public const TEMP_TABLE_PREFIX = '_db_tools_sample_'; + /** + * @todo in 3.0 move this as a constructor-promoted property. + */ + protected readonly Context $context; + protected readonly Options $options; + final public function __construct( protected string $tableName, protected string $columnName, protected DatabaseSession $databaseSession, - protected Options $options, + /** + * @todo In 3.0, Options will be replaced with Context instead. + */ + Options $options, ) { + if ($options instanceof Context) { + $this->context = $options; + $this->options = $options->options; + } else { + \trigger_deprecation('makinacorpus/db-tools-bundle', '2.1.0', \sprintf("%s::__construct() 'Options \$options' will be changed to 'Context \$context' in 3.0", static::class)); + $this->options = $options; + $this->context = new Context($options); + } + $this->validateOptions(); } diff --git a/src/Anonymization/Anonymizer/Context.php b/src/Anonymization/Anonymizer/Context.php new file mode 100644 index 00000000..b785db4b --- /dev/null +++ b/src/Anonymization/Anonymizer/Context.php @@ -0,0 +1,111 @@ +throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function get(string $name, mixed $default = null, bool $required = false): mixed + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function all(): array + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function count(): int + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function with(array $options): Options + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function toDisplayString(): string + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function getString(string $name, ?string $default = null, bool $required = false): ?string + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function getBool(string $name, ?bool $default = null, bool $required = false): ?bool + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function getInt(string $name, ?int $default = null, bool $required = false): ?int + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function getFloat(string $name, ?float $default = null, bool $required = false): ?float + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function getDate(string $name, ?\DateTimeImmutable $default = null, bool $required = false): ?\DateTimeImmutable + { + $this->throwOptionsDeprecation(__METHOD__); + } + + #[\Override] + #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] + public function getInterval(string $name, ?\DateInterval $default = null, bool $required = false): ?\DateInterval + { + $this->throwOptionsDeprecation(__METHOD__); + } +} From eba1c0f8811588aa608573bce6766393101a45ca Mon Sep 17 00:00:00 2001 From: Pierre Rineau Date: Thu, 24 Jul 2025 12:11:03 +0200 Subject: [PATCH 2/4] no issue - introduce anonymizator Context, move salt into context --- CHANGELOG.md | 5 + src/Anonymization/Anonymizator.php | 16 +-- .../Anonymizer/AbstractAnonymizer.php | 25 +---- .../Anonymizer/AnonymizerRegistry.php | 4 +- src/Anonymization/Anonymizer/Context.php | 106 ++---------------- .../Anonymizer/Core/EmailAnonymizer.php | 2 +- .../Anonymizer/Core/Md5Anonymizer.php | 2 +- .../Core/StringPatternAnonymizer.php | 4 +- .../AbstractMultipleColumnAnonymizerTest.php | 7 +- .../Core/ConstantAnonymizerTest.php | 3 + .../Anonymizer/Core/EmailAnonymizerTest.php | 22 +++- .../Anonymizer/Core/FloatAnonymizerTest.php | 13 ++- .../Anonymizer/Core/IbanBicAnonymizerTest.php | 10 +- .../Anonymizer/Core/IntegerAnonymizerTest.php | 13 ++- .../Core/LoremIpsumAnonymizerTest.php | 8 +- .../Anonymizer/Core/Md5AnonymizerTest.php | 9 +- .../Core/PasswordAnonymizerTest.php | 7 +- .../DbToolsExtensionTest.php | 2 +- 18 files changed, 106 insertions(+), 152 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87385157..49c87257 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ ## Next * [feature] 🌟 String pattern anonymizer, build complex strings by fetching values from other anonymizers. +* [internal] introduce anonymizer context for carrying environment configuration to anonymizers (#235). +* [bc] Salt in `AbstractAnonymizer::$option->get('salt')` in now in `AbstractAnonymizer::$context->salt` (#235). +* [bc] `AbstractAnonymizer::__construct()` now expects an additional `$context` parameter (#235). +* [bc] `Anonymizator::__construct()` `$salt` parameter was removed (#235). +* [fix] Some minor PHP 8.4 deprecations. ## 2.0.3 diff --git a/src/Anonymization/Anonymizator.php b/src/Anonymization/Anonymizator.php index 6fbb6536..3f95d4fe 100644 --- a/src/Anonymization/Anonymizator.php +++ b/src/Anonymization/Anonymizator.php @@ -43,16 +43,17 @@ class Anonymizator implements LoggerAwareInterface ]; private OutputInterface $output; + private readonly Context $defaultContext; public function __construct( private DatabaseSession $databaseSession, private AnonymizerRegistry $anonymizerRegistry, private AnonymizationConfig $anonymizationConfig, - private ?string $salt = null, - private readonly Context $defaultContext = new Context(), + ?Context $defaultContext = null, ) { $this->logger = new NullLogger(); $this->output = new NullOutput(); + $this->defaultContext = $defaultContext ?? new Context(); } /** @@ -73,14 +74,10 @@ public function setOutput(OutputInterface $output): self return $this; } + #[\Deprecated(message: "Will be removed in 3.0, use Context::generateRandomSalt() instead.", since: "2.1.0")] public static function generateRandomSalt(): string { - return \base64_encode(\random_bytes(12)); - } - - protected function getSalt(): string - { - return $this->salt ??= self::generateRandomSalt(); + return Context::generateRandomSalt(); } /** @@ -91,8 +88,7 @@ protected function createAnonymizer(AnonymizerConfig $config, Context $context): return $this->anonymizerRegistry->createAnonymizer( $config->anonymizer, $config, - // @todo "salt" should belong to context instead. - $context->withOptions($config->options->with(['salt' => $this->getSalt()])), + $context, $this->databaseSession ); } diff --git a/src/Anonymization/Anonymizer/AbstractAnonymizer.php b/src/Anonymization/Anonymizer/AbstractAnonymizer.php index 645dc8ac..43000104 100644 --- a/src/Anonymization/Anonymizer/AbstractAnonymizer.php +++ b/src/Anonymization/Anonymizer/AbstractAnonymizer.php @@ -4,7 +4,6 @@ namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer; -use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizator; use MakinaCorpus\QueryBuilder\DatabaseSession; use MakinaCorpus\QueryBuilder\Expression; use MakinaCorpus\QueryBuilder\ExpressionFactory; @@ -18,30 +17,13 @@ abstract class AbstractAnonymizer public const JOIN_TABLE = '_target_table'; public const TEMP_TABLE_PREFIX = '_db_tools_sample_'; - /** - * @todo in 3.0 move this as a constructor-promoted property. - */ - protected readonly Context $context; - protected readonly Options $options; - final public function __construct( protected string $tableName, protected string $columnName, protected DatabaseSession $databaseSession, - /** - * @todo In 3.0, Options will be replaced with Context instead. - */ - Options $options, + protected readonly Context $context, + protected readonly Options $options, ) { - if ($options instanceof Context) { - $this->context = $options; - $this->options = $options->options; - } else { - \trigger_deprecation('makinacorpus/db-tools-bundle', '2.1.0', \sprintf("%s::__construct() 'Options \$options' will be changed to 'Context \$context' in 3.0", static::class)); - $this->options = $options; - $this->context = new Context($options); - } - $this->validateOptions(); } @@ -95,9 +77,10 @@ protected function getJoinColumn(): Expression /** * Get a random, global salt for anonymizing hashed values. */ + #[\Deprecated(message: "Will be removed in 3.0, use \$this->context->salt instead.", since: "2.1.0")] protected function getSalt(): string { - return $this->options->get('salt') ?? Anonymizator::generateRandomSalt(); + return $this->context->salt; } /** diff --git a/src/Anonymization/Anonymizer/AnonymizerRegistry.php b/src/Anonymization/Anonymizer/AnonymizerRegistry.php index ac3f9fd1..86380b0e 100644 --- a/src/Anonymization/Anonymizer/AnonymizerRegistry.php +++ b/src/Anonymization/Anonymizer/AnonymizerRegistry.php @@ -67,12 +67,12 @@ public function getAllAnonymizerMetadata(): array public function createAnonymizer( string $name, AnonymizerConfig $config, - Options $options, + Context $context, DatabaseSession $databaseSession, ): AbstractAnonymizer { $className = $this->getAnonymizerClass($name); - $ret = new $className($config->table, $config->targetName, $databaseSession, $options); + $ret = new $className($config->table, $config->targetName, $databaseSession, $context, $config->options); \assert($ret instanceof AbstractAnonymizer); if ($ret instanceof WithAnonymizerRegistry) { diff --git a/src/Anonymization/Anonymizer/Context.php b/src/Anonymization/Anonymizer/Context.php index b785db4b..7316fe6f 100644 --- a/src/Anonymization/Anonymizer/Context.php +++ b/src/Anonymization/Anonymizer/Context.php @@ -4,108 +4,18 @@ namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer; -/** - * @todo - * Remove "extends Options" in 3.0. Change AbstractAnonymizer::__construct() signature accordingly. - */ -class Context extends Options +class Context { - public function __construct( - public Options $options = new Options(), - ) {} - - public function withOptions(Options $options): Context - { - return new self($options); - } - - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - private function throwOptionsDeprecation(string $method): never - { - throw new \LogicException(\sprintf("Calling %s::%s() is forbidden, this method only exists for backward compatibility purpose..", static::class, $method)); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function has(string $name): bool - { - $this->throwOptionsDeprecation(__METHOD__); - } + public readonly string $salt; - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function get(string $name, mixed $default = null, bool $required = false): mixed - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function all(): array - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function count(): int - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function with(array $options): Options - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function toDisplayString(): string - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function getString(string $name, ?string $default = null, bool $required = false): ?string - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function getBool(string $name, ?bool $default = null, bool $required = false): ?bool - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function getInt(string $name, ?int $default = null, bool $required = false): ?int - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function getFloat(string $name, ?float $default = null, bool $required = false): ?float - { - $this->throwOptionsDeprecation(__METHOD__); - } - - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function getDate(string $name, ?\DateTimeImmutable $default = null, bool $required = false): ?\DateTimeImmutable - { - $this->throwOptionsDeprecation(__METHOD__); + public function __construct( + ?string $salt = null, + ) { + $this->salt = $salt ?? self::generateRandomSalt(); } - #[\Override] - #[\Deprecated(message: "Only exists for class signature backward compatibility.", since: "2.1.0")] - public function getInterval(string $name, ?\DateInterval $default = null, bool $required = false): ?\DateInterval + public static function generateRandomSalt(): string { - $this->throwOptionsDeprecation(__METHOD__); + return \base64_encode(\random_bytes(12)); } } diff --git a/src/Anonymization/Anonymizer/Core/EmailAnonymizer.php b/src/Anonymization/Anonymizer/Core/EmailAnonymizer.php index ba73ca7d..a570004f 100644 --- a/src/Anonymization/Anonymizer/Core/EmailAnonymizer.php +++ b/src/Anonymization/Anonymizer/Core/EmailAnonymizer.php @@ -38,7 +38,7 @@ public function createAnonymizeExpression(Update $update): Expression $userExpr = $expr->column($this->columnName, $this->tableName); if ($this->options->getBool('use_salt', true)) { - $userExpr = $expr->concat($userExpr, $expr->value($this->getSalt())); + $userExpr = $expr->concat($userExpr, $expr->value($this->context->salt)); } $emailHashExpr = $expr->md5($userExpr); diff --git a/src/Anonymization/Anonymizer/Core/Md5Anonymizer.php b/src/Anonymization/Anonymizer/Core/Md5Anonymizer.php index 0d37c80e..04a1126d 100644 --- a/src/Anonymization/Anonymizer/Core/Md5Anonymizer.php +++ b/src/Anonymization/Anonymizer/Core/Md5Anonymizer.php @@ -27,7 +27,7 @@ public function createAnonymizeExpression(Update $update): Expression $columnExpr = $expr->column($this->columnName, $this->tableName); if ($this->options->get('use_salt', true)) { - $columnExpr = $expr->concat($columnExpr, $expr->value($this->getSalt())); + $columnExpr = $expr->concat($columnExpr, $expr->value($this->context->salt)); // Work around some RDBMS not seeing the NULL value anymore // once we added the string concat. diff --git a/src/Anonymization/Anonymizer/Core/StringPatternAnonymizer.php b/src/Anonymization/Anonymizer/Core/StringPatternAnonymizer.php index 890dc979..fdb9f99a 100644 --- a/src/Anonymization/Anonymizer/Core/StringPatternAnonymizer.php +++ b/src/Anonymization/Anonymizer/Core/StringPatternAnonymizer.php @@ -194,14 +194,14 @@ private function getAnonymizer(string $anonymizer, ?Options $options = null, int return $ret; } - $config = new AnonymizerConfig($this->tableName, $this->columnName, $anonymizer, new Options()); + $config = new AnonymizerConfig($this->tableName, $this->columnName, $anonymizer, $options ?? new Options()); return $this->childAnonymizers[$key] = $this ->getAnonymizerRegistry() ->createAnonymizer( $anonymizer, $config, - $options ?? new Options(), + $this->context, $this->databaseSession ) ; diff --git a/tests/Unit/Anonymization/Anonymizer/AbstractMultipleColumnAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/AbstractMultipleColumnAnonymizerTest.php index 0d0f9bb4..4811128f 100644 --- a/tests/Unit/Anonymization/Anonymizer/AbstractMultipleColumnAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/AbstractMultipleColumnAnonymizerTest.php @@ -5,6 +5,7 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AbstractMultipleColumnAnonymizer; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Attribute\AsAnonymizer; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; @@ -17,6 +18,7 @@ public function testValidateOptionsOkWithAllColumnOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'column_1' => 'actual_column_1', 'column_2' => 'actual_column_2', @@ -32,6 +34,7 @@ public function testValidateOptionsOkWithSomeColumnOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'column_2' => 'actual_column_2', ]), @@ -48,7 +51,8 @@ public function testValidateOptionsKoWithNoOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), - new Options([]), + new Context(), + new Options(), ); } @@ -60,6 +64,7 @@ public function testValidateOptionsKoWithColumnMapedTwice(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'column_1' => 'actual_column_1', 'column_2' => 'actual_column_1', diff --git a/tests/Unit/Anonymization/Anonymizer/Core/ConstantAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/ConstantAnonymizerTest.php index ef096895..8815f95e 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/ConstantAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/ConstantAnonymizerTest.php @@ -4,6 +4,7 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\ConstantAnonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; @@ -16,6 +17,7 @@ public function testValidateOptionsOkWithValueOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'value' => 'test', ]), @@ -32,6 +34,7 @@ public function testValidateOptionsKoWithValueOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([]), ); } diff --git a/tests/Unit/Anonymization/Anonymizer/Core/EmailAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/EmailAnonymizerTest.php index 3274cd02..3cc5a01a 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/EmailAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/EmailAnonymizerTest.php @@ -4,8 +4,9 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; -use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\EmailAnonymizer; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; class EmailAnonymizerTest extends UnitTestCase @@ -16,7 +17,8 @@ public function testValidateOptionsOkWithNoOption(): void 'some_table', 'email', $this->getDatabaseSession(), - new Options([]), + new Context(), + new Options(), ); self::expectNotToPerformAssertions(); @@ -28,6 +30,7 @@ public function testValidateOptionsOkWithDomainOptionAsString(): void 'some_table', 'email', $this->getDatabaseSession(), + new Context(), new Options([ 'domain' => 'makina-corpus.com', ]), @@ -44,6 +47,7 @@ public function testValidateOptionsKoWithDomainNotStringable(): void 'some_table', 'email', $this->getDatabaseSession(), + new Context(), new Options([ 'domain' => ['ttt', 'ttt'], ]), @@ -56,6 +60,7 @@ public function testValidateOptionsOkWithUseSaltOptionAsBool(): void 'some_table', 'email', $this->getDatabaseSession(), + new Context(), new Options([ 'use_salt' => true, ]), @@ -72,6 +77,7 @@ public function testValidateOptionsKoWithUseSaltOptionAsNoneBool(): void 'some_table', 'email', $this->getDatabaseSession(), + new Context(), new Options([ 'use_salt' => ['true'], ]), @@ -86,9 +92,10 @@ public function testAnonymizeWithDefaultDomain(): void 'some_table', 'email', $this->getDatabaseSession(), - new Options([ - 'salt' => 'my_salt', - ]) + new Context( + salt: 'my_salt', + ), + new Options(), ); $instance->anonymize($update); @@ -121,9 +128,11 @@ public function testAnonymize(): void 'some_table', 'email', $this->getDatabaseSession(), + new Context( + salt: 'my_salt', + ), new Options([ 'domain' => 'makina-corpus.com', - 'salt' => 'my_salt', ]), ); @@ -157,6 +166,7 @@ public function testAnonymizeWithoutSalt(): void 'some_table', 'email', $this->getDatabaseSession(), + new Context(), new Options([ 'use_salt' => false, ]), diff --git a/tests/Unit/Anonymization/Anonymizer/Core/FloatAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/FloatAnonymizerTest.php index 9c253e8c..0af4d050 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/FloatAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/FloatAnonymizerTest.php @@ -4,6 +4,7 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\FloatAnonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; @@ -16,6 +17,7 @@ public function testValidateOptionsOkWithMinMax(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 12.5, 'max' => 14.5, @@ -33,6 +35,7 @@ public function testValidateOptionsKoWithMinGreaterThanMax(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 15.5, 'max' => 14.5, @@ -46,6 +49,7 @@ public function testValidateOptionsOkWithDelta(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'delta' => 12.5, ]), @@ -62,6 +66,7 @@ public function testValidateOptionsKoWithDeltaLesserThan0(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'delta' => -12.5, ]), @@ -74,6 +79,7 @@ public function testValidateOptionsOkWithPercent(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'percent' => 12, ]), @@ -90,6 +96,7 @@ public function testValidateOptionsKoWithPercentLesserThan0(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'percent' => -12, ]), @@ -104,6 +111,7 @@ public function testValidateOptionsKoWithTooManyOptions(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 12.5, 'max' => 14.5, @@ -117,6 +125,7 @@ public function testValidateOptionsKoWithTooManyOptions(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 12.5, 'max' => 14.5, @@ -130,6 +139,7 @@ public function testValidateOptionsKoWithTooManyOptions(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'delta' => 14.5, 'percent' => 14.5, @@ -145,7 +155,8 @@ public function testValidateOptionsKoWithoutOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), - new Options([]), + new Context(), + new Options(), ); } } diff --git a/tests/Unit/Anonymization/Anonymizer/Core/IbanBicAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/IbanBicAnonymizerTest.php index 0bcb166d..597e65a2 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/IbanBicAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/IbanBicAnonymizerTest.php @@ -4,8 +4,9 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; -use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\IbanBicAnonymizer; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; class IbanBicAnonymizerTest extends UnitTestCase @@ -16,6 +17,7 @@ public function testValidateOptionsOkWithNoOptionButColumns(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', @@ -31,6 +33,7 @@ public function testValidateOptionsOkWithAllOptions(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', @@ -50,6 +53,7 @@ public function testValidateOptionsKoWithNegativeSampleSize(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', @@ -66,6 +70,7 @@ public function testValidateOptionsKoWithZeroSampleSize(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', @@ -82,6 +87,7 @@ public function testValidateOptionsKoWithCountryTooLong(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', @@ -98,6 +104,7 @@ public function testValidateOptionsKoWithCountryNotLetters(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', @@ -114,6 +121,7 @@ public function testValidateOptionsKoWithCountryNotString(): void 'some_table', 'iban', $this->getDatabaseSession(), + new Context(), new Options([ 'iban' => 'foo_iban', 'bic' => 'foo_bic', diff --git a/tests/Unit/Anonymization/Anonymizer/Core/IntegerAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/IntegerAnonymizerTest.php index 0b1227aa..16d70373 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/IntegerAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/IntegerAnonymizerTest.php @@ -4,6 +4,7 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\IntegerAnonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; @@ -16,6 +17,7 @@ public function testValidateOptionsOkWithMinMax(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 12, 'max' => 14, @@ -33,6 +35,7 @@ public function testValidateOptionsKoWithMinGreaterThanMax(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 15, 'max' => 14, @@ -46,6 +49,7 @@ public function testValidateOptionsOkWithDelta(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'delta' => 12, ]), @@ -62,6 +66,7 @@ public function testValidateOptionsKoWithDeltaLesserThan0(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'delta' => -12, ]), @@ -74,6 +79,7 @@ public function testValidateOptionsOkWithPercent(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'percent' => 12, ]), @@ -90,6 +96,7 @@ public function testValidateOptionsKoWithPercentLesserThan0(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'percent' => -12, ]), @@ -104,6 +111,7 @@ public function testValidateOptionsKoWithTooManyOptions(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 12, 'max' => 14, @@ -117,6 +125,7 @@ public function testValidateOptionsKoWithTooManyOptions(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'min' => 12, 'max' => 14, @@ -130,6 +139,7 @@ public function testValidateOptionsKoWithTooManyOptions(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'delta' => 14, 'percent' => 14, @@ -145,7 +155,8 @@ public function testValidateOptionsKoWithoutOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), - new Options([]), + new Context(), + new Options(), ); } } diff --git a/tests/Unit/Anonymization/Anonymizer/Core/LoremIpsumAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/LoremIpsumAnonymizerTest.php index 40a96618..ddf152fb 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/LoremIpsumAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/LoremIpsumAnonymizerTest.php @@ -4,6 +4,7 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\LoremIpsumAnonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; @@ -16,7 +17,8 @@ public function testValidateOptionsOkWithNoOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), - new Options([]), + new Context(), + new Options(), ); self::expectNotToPerformAssertions(); @@ -28,6 +30,7 @@ public function testValidateOptionsOkWithWords(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'words' => 15, ]), @@ -44,6 +47,7 @@ public function testValidateOptionsKoWithWordsLesserThan0(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'words' => -15, ]), @@ -56,6 +60,7 @@ public function testValidateOptionsOkWithParagraphs(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'paragraphs' => 15, ]), @@ -72,6 +77,7 @@ public function testValidateOptionsKoWithParagraphsLesserThan0(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'paragraphs' => -15, ]), diff --git a/tests/Unit/Anonymization/Anonymizer/Core/Md5AnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/Md5AnonymizerTest.php index 4283bf99..1f56f2f8 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/Md5AnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/Md5AnonymizerTest.php @@ -4,8 +4,9 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; -use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\Md5Anonymizer; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; class Md5AnonymizerTest extends UnitTestCase @@ -18,9 +19,8 @@ public function testAnonymize(): void 'some_table', 'some_column', $this->getDatabaseSession(), - new Options([ - 'salt' => 'my_salt', - ]) + new Context(salt: 'my_salt'), + new Options(), ); $instance->anonymize($update); @@ -54,6 +54,7 @@ public function testAnonymizeWithoutSalt(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'use_salt' => false, ]) diff --git a/tests/Unit/Anonymization/Anonymizer/Core/PasswordAnonymizerTest.php b/tests/Unit/Anonymization/Anonymizer/Core/PasswordAnonymizerTest.php index d9f6a653..95f079a8 100644 --- a/tests/Unit/Anonymization/Anonymizer/Core/PasswordAnonymizerTest.php +++ b/tests/Unit/Anonymization/Anonymizer/Core/PasswordAnonymizerTest.php @@ -4,6 +4,7 @@ namespace MakinaCorpus\DbToolsBundle\Tests\Unit\Anonymization\Anonymizer\Core; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core\PasswordAnonymizer; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Options; use MakinaCorpus\DbToolsBundle\Test\UnitTestCase; @@ -16,7 +17,8 @@ public function testValidateOptionsOkWithNoOption(): void 'some_table', 'some_column', $this->getDatabaseSession(), - new Options([]), + new Context(), + new Options(), ); self::expectNotToPerformAssertions(); @@ -28,6 +30,7 @@ public function testValidateOptionsOkWithPassword(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'password' => 'test', ]), @@ -42,6 +45,7 @@ public function testValidateOptionsOkWithAlgorithm(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'algorithm' => 'bcrypt', ]), @@ -58,6 +62,7 @@ public function testValidateOptionsKoWithUnknownAlgorithm(): void 'some_table', 'some_column', $this->getDatabaseSession(), + new Context(), new Options([ 'algorithm' => 'toto', ]), diff --git a/tests/Unit/Bridge/Symfony/DependencyInjection/DbToolsExtensionTest.php b/tests/Unit/Bridge/Symfony/DependencyInjection/DbToolsExtensionTest.php index c57d0122..85b3e49c 100644 --- a/tests/Unit/Bridge/Symfony/DependencyInjection/DbToolsExtensionTest.php +++ b/tests/Unit/Bridge/Symfony/DependencyInjection/DbToolsExtensionTest.php @@ -32,7 +32,7 @@ private function getContainer(array $parameters = [], array $bundles = []): Cont return $container; } - private function testExtension(array $config, ContainerBuilder $container = null): void + private function testExtension(array $config, ?ContainerBuilder $container = null): void { $container ??= $this->getContainer(); $extension = new DbToolsExtension(); From 8c32e1a29c034a9f1912c8a698ef61443672591d Mon Sep 17 00:00:00 2001 From: Pierre Rineau Date: Wed, 23 Jul 2025 14:01:47 +0200 Subject: [PATCH 3/4] #226 - document base path for file loading --- .../content/anonymization/core-anonymizers.md | 1 + .../core-anonymizers/file-resolution.md | 39 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 docs/content/anonymization/core-anonymizers/file-resolution.md diff --git a/docs/content/anonymization/core-anonymizers.md b/docs/content/anonymization/core-anonymizers.md index dbc34d57..383ca782 100644 --- a/docs/content/anonymization/core-anonymizers.md +++ b/docs/content/anonymization/core-anonymizers.md @@ -19,3 +19,4 @@ This page list all *Anonymizers* provided by *DbToolsBundle*. + diff --git a/docs/content/anonymization/core-anonymizers/file-resolution.md b/docs/content/anonymization/core-anonymizers/file-resolution.md new file mode 100644 index 00000000..ae21c508 --- /dev/null +++ b/docs/content/anonymization/core-anonymizers/file-resolution.md @@ -0,0 +1,39 @@ +## File name resolution + +In various places you can configure relative file names in order to load data, +here is how relative file names are resolved. +**All relative file names will be considered relative to a given _base path_.** + +The default base path is always stable but depends upon your selected flavor. + +@todo examples + +@@@ symfony + +When parsing Symfony configuration, base path will always be the project +directory, known as `%kernel.project_dir%` variable in Symfony configuration. +This is the directory where your `composer.json` file. + +@todo examples + +@@@ +@@@ laravel + +When parsing Laravel configuration, base path will always be the project +directory, as returned by the `base_path()` Laravel function. + +@todo examples + +@@@ +@@@ standalone docker + +When parsing configuration in the standalone CLI version or in docker context, +base path will be currently being parsed Yaml file. + +:::tip +If you set the `workdir` option in your configuration file, then it will +override the file directory and use it as the base path. + +@todo link to `workdir` documentation +::: +@@@ From ab5160b69a7d1fd9ee62eb46a9c6569e1a3bf97c Mon Sep 17 00:00:00 2001 From: Pierre Rineau Date: Tue, 22 Jul 2025 14:21:12 +0200 Subject: [PATCH 4/4] feature: file enum anonymizer, inject samples in database from a plain text or csv file --- CHANGELOG.md | 1 + .../content/anonymization/core-anonymizers.md | 1 + .../anonymization/core-anonymizers/address.md | 7 + .../core-anonymizers/file-enum.md | 79 +++++++ .../core-anonymizers/iban-bic.md | 6 + src/Anonymization/AnonymizatorFactory.php | 20 +- .../Anonymizer/AnonymizerRegistry.php | 1 + src/Anonymization/Anonymizer/Context.php | 3 + .../Anonymizer/Core/FileEnumAnonymizer.php | 65 ++++++ src/Bridge/Laravel/DbToolsServiceProvider.php | 2 + src/Bridge/Standalone/Bootstrap.php | 2 +- .../Symfony/Resources/config/services.yaml | 1 + src/Helper/FileReader.php | 211 ++++++++++++++++++ .../Core/FileEnumAnonymizerTest.php | 127 +++++++++++ .../Pack/resources/enum-file-header.csv | 3 + .../Pack/resources/enum-file-header.txt | 5 + .../Pack/resources/enum-file.csv | 4 + .../Pack/resources/enum-file.tsv | 2 + .../Pack/resources/enum-file.txt | 4 + .../Anonymization/Pack/resources/hexasmal.csv | 19 ++ tests/Unit/Helper/FileReaderTest.php | 93 ++++++++ 21 files changed, 654 insertions(+), 2 deletions(-) create mode 100644 docs/content/anonymization/core-anonymizers/file-enum.md create mode 100644 src/Anonymization/Anonymizer/Core/FileEnumAnonymizer.php create mode 100644 src/Helper/FileReader.php create mode 100644 tests/Functional/Anonymizer/Core/FileEnumAnonymizerTest.php create mode 100644 tests/Resources/Anonymization/Pack/resources/enum-file-header.csv create mode 100644 tests/Resources/Anonymization/Pack/resources/enum-file-header.txt create mode 100644 tests/Resources/Anonymization/Pack/resources/enum-file.csv create mode 100644 tests/Resources/Anonymization/Pack/resources/enum-file.tsv create mode 100644 tests/Resources/Anonymization/Pack/resources/enum-file.txt create mode 100644 tests/Resources/Anonymization/Pack/resources/hexasmal.csv create mode 100644 tests/Unit/Helper/FileReaderTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 49c87257..e84a0c15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Next +* [feature] 🌟 File enum anonymizer, inject samples in database from a plain text or CSV file. * [feature] 🌟 String pattern anonymizer, build complex strings by fetching values from other anonymizers. * [internal] introduce anonymizer context for carrying environment configuration to anonymizers (#235). * [bc] Salt in `AbstractAnonymizer::$option->get('salt')` in now in `AbstractAnonymizer::$context->salt` (#235). diff --git a/docs/content/anonymization/core-anonymizers.md b/docs/content/anonymization/core-anonymizers.md index 383ca782..e79a9cc9 100644 --- a/docs/content/anonymization/core-anonymizers.md +++ b/docs/content/anonymization/core-anonymizers.md @@ -14,6 +14,7 @@ This page list all *Anonymizers* provided by *DbToolsBundle*. + diff --git a/docs/content/anonymization/core-anonymizers/address.md b/docs/content/anonymization/core-anonymizers/address.md index 1df99618..69395f95 100644 --- a/docs/content/anonymization/core-anonymizers/address.md +++ b/docs/content/anonymization/core-anonymizers/address.md @@ -95,6 +95,13 @@ customer: #... ``` ::: + +:::warning +This anonymizer works at the *table level* which means that the PHP attribute +cannot target object properties: you must specify table column names and not +PHP class property names. +::: + @@@ :::tip diff --git a/docs/content/anonymization/core-anonymizers/file-enum.md b/docs/content/anonymization/core-anonymizers/file-enum.md new file mode 100644 index 00000000..f9b51b00 --- /dev/null +++ b/docs/content/anonymization/core-anonymizers/file-enum.md @@ -0,0 +1,79 @@ +## File enum + +This anonymizer will fill configured column with a random value from a given sample fetched +from a plain text or a CSV file. + +Given the following file: + +```txt +none +bad +good +expert +``` + +Then: + +@@@ standalone docker + +```yaml [YAML] +# db_tools.config.yaml +anonymization: + default: + customer: + level: + anonymizer: file_enum + options: {source: ./resources/levels.txt} + #... +``` + +@@@ +@@@ symfony + +::: code-group +```php [Attribute] +namespace App\Entity; + +use Doctrine\ORM\Mapping as ORM; +use MakinaCorpus\DbToolsBundle\Attribute\Anonymize; + +#[ORM\Entity()] +#[ORM\Table(name: 'customer')] +class Customer +{ + // ... + + #[ORM\Column(length: 255)] + #[Anonymize(type: 'string', options: ['source' => "./resources/levels.txt"])] // [!code ++] + private ?string $level = null; + + // ... +} +``` + +```yaml [YAML] +# config/anonymization.yaml +customer: + level: + anonymizer: file_enum + options: {source: ./resources/levels.txt} +#... +``` +::: + +@@@ + +File will be read this way: + - When using a plain text file, each line is a value, no matter what's inside. + - When using a CSV file, the first column will be used instead. + +When parsing a file file, you can set the following options as well: + - `file_csv_enclosure`: if file is a CSV, use this as the enclosure character (default is `'"'`). + - `file_csv_escape`: if file is a CSV, use this as the escape character (default is `'\\'`). + - `file_csv_separator`: if file is a CSV, use this as the separator character (default is `','`). + - `file_skip_header`: when reading any file, set this to true to skip the first line (default is `false`). + +:::tip +The filename can be absolute, or relative. For relative file resolution +please see [*File name resolution*](#file-name-resolution) +::: diff --git a/docs/content/anonymization/core-anonymizers/iban-bic.md b/docs/content/anonymization/core-anonymizers/iban-bic.md index c15f2cc5..179df9da 100644 --- a/docs/content/anonymization/core-anonymizers/iban-bic.md +++ b/docs/content/anonymization/core-anonymizers/iban-bic.md @@ -74,4 +74,10 @@ customer: ``` ::: +:::warning +This anonymizer works at the *table level* which means that the PHP attribute +cannot target object properties: you must specify table column names and not +PHP class property names. +::: + @@@ diff --git a/src/Anonymization/AnonymizatorFactory.php b/src/Anonymization/AnonymizatorFactory.php index 079eb698..21cd1269 100644 --- a/src/Anonymization/AnonymizatorFactory.php +++ b/src/Anonymization/AnonymizatorFactory.php @@ -5,6 +5,7 @@ namespace MakinaCorpus\DbToolsBundle\Anonymization; use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AnonymizerRegistry; +use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context; use MakinaCorpus\DbToolsBundle\Anonymization\Config\AnonymizationConfig; use MakinaCorpus\DbToolsBundle\Anonymization\Config\Loader\LoaderInterface; use MakinaCorpus\DbToolsBundle\Database\DatabaseSessionRegistry; @@ -21,8 +22,24 @@ public function __construct( private DatabaseSessionRegistry $registry, private AnonymizerRegistry $anonymizerRegistry, private ?LoggerInterface $logger = null, + /** + * @todo + * This is not the right place to set this, but any other alternative + * would require a deep refactor of anonymizer options. + */ + private ?string $basePath = null, ) {} + /** + * @internal + * For Laravel dependency injection only. + * This can change anytime. + */ + public function setBasePath(?string $basePath): void + { + $this->basePath = $basePath; + } + /** * Add configuration loader. */ @@ -49,7 +66,8 @@ public function getOrCreate(string $connectionName): Anonymizator $anonymizator = new Anonymizator( $this->registry->getDatabaseSession($connectionName), $this->anonymizerRegistry, - $config + $config, + new Context(basePath: $this->basePath), ); if ($this->logger) { diff --git a/src/Anonymization/Anonymizer/AnonymizerRegistry.php b/src/Anonymization/Anonymizer/AnonymizerRegistry.php index 86380b0e..7442e5c1 100644 --- a/src/Anonymization/Anonymizer/AnonymizerRegistry.php +++ b/src/Anonymization/Anonymizer/AnonymizerRegistry.php @@ -17,6 +17,7 @@ class AnonymizerRegistry Core\ConstantAnonymizer::class, Core\DateAnonymizer::class, Core\EmailAnonymizer::class, + Core\FileEnumAnonymizer::class, Core\FirstNameAnonymizer::class, Core\FloatAnonymizer::class, Core\IbanBicAnonymizer::class, diff --git a/src/Anonymization/Anonymizer/Context.php b/src/Anonymization/Anonymizer/Context.php index 7316fe6f..05fe82ba 100644 --- a/src/Anonymization/Anonymizer/Context.php +++ b/src/Anonymization/Anonymizer/Context.php @@ -7,11 +7,14 @@ class Context { public readonly string $salt; + public readonly string $basePath; public function __construct( ?string $salt = null, + ?string $basePath = null, ) { $this->salt = $salt ?? self::generateRandomSalt(); + $this->basePath = $basePath ?? \getcwd(); } public static function generateRandomSalt(): string diff --git a/src/Anonymization/Anonymizer/Core/FileEnumAnonymizer.php b/src/Anonymization/Anonymizer/Core/FileEnumAnonymizer.php new file mode 100644 index 00000000..99e63667 --- /dev/null +++ b/src/Anonymization/Anonymizer/Core/FileEnumAnonymizer.php @@ -0,0 +1,65 @@ +filename) { + return $this->filename; + } + + $filename = $this->options->getString('source', null, true); + $filename = FileReader::ensurePathAbsolute($filename, $this->context->basePath); + + FileReader::ensureFile($filename); + + return $this->filename = $filename; + } + + #[\Override] + protected function validateOptions(): void + { + parent::validateOptions(); + + $this->getFilename(); + } + + #[\Override] + protected function getSample(): array + { + return \iterator_to_array( + FileReader::readEnumFile( + $this->getFilename(), + $this->options, + ) + ); + } +} diff --git a/src/Bridge/Laravel/DbToolsServiceProvider.php b/src/Bridge/Laravel/DbToolsServiceProvider.php index 75b71b65..55471021 100644 --- a/src/Bridge/Laravel/DbToolsServiceProvider.php +++ b/src/Bridge/Laravel/DbToolsServiceProvider.php @@ -141,6 +141,8 @@ public function register(): void $this->app->resolving( AnonymizatorFactory::class, function (AnonymizatorFactory $factory, Application $app): void { + $factory->setBasePath($app->basePath()); + /** @var Repository $config */ $config = $app->make('config'); diff --git a/src/Bridge/Standalone/Bootstrap.php b/src/Bridge/Standalone/Bootstrap.php index 32b16932..43e9742f 100644 --- a/src/Bridge/Standalone/Bootstrap.php +++ b/src/Bridge/Standalone/Bootstrap.php @@ -234,7 +234,7 @@ public static function bootstrap(array $config = [], array $configFiles = [], ?L $databaseSessionRegistry = self::createDatabaseSessionRegistry($configRegistry); $anonymizerRegistry = self::createAnonymizerRegistry($config); - $anonymizatorFactory = new AnonymizatorFactory($databaseSessionRegistry, $anonymizerRegistry, $logger); + $anonymizatorFactory = new AnonymizatorFactory($databaseSessionRegistry, $anonymizerRegistry, $logger, $config['workdir']); foreach (($config['anonymization_files'] ?? []) as $connectionName => $file) { $anonymizatorFactory->addConfigurationLoader(new YamlLoader($file, $connectionName)); diff --git a/src/Bridge/Symfony/Resources/config/services.yaml b/src/Bridge/Symfony/Resources/config/services.yaml index ecad5cce..befdcc57 100644 --- a/src/Bridge/Symfony/Resources/config/services.yaml +++ b/src/Bridge/Symfony/Resources/config/services.yaml @@ -119,4 +119,5 @@ services: - '@db_tools.database_session.registry' - '@db_tools.anonymization.anonymizer.registry' - '@logger' + - "%kernel.project_dir%" tags: [{ name: monolog.logger, channel: db_tools_anonymization }] diff --git a/src/Helper/FileReader.php b/src/Helper/FileReader.php new file mode 100644 index 00000000..a0caa5f8 --- /dev/null +++ b/src/Helper/FileReader.php @@ -0,0 +1,211 @@ + + */ + public static function readEnumFile(string $filename, ?Options $options = null, ?string $anonymizerId = null): iterable + { + $ext = self::getFileExtension($filename); + + // no match() usage here because CSV cannot expressed as a single expression. + if (null === $ext || 'txt' === $ext) { + yield from self::readTxtFile($filename, $options, $anonymizerId); + } elseif ('csv' === $ext || 'tsv' === $ext) { + foreach (self::readCsvFile($filename, $options, $anonymizerId) as $line) { + \assert(\is_array($line)); + if ($line) { + yield $line[0]; + } + } + } elseif ($anonymizerId) { + throw new ConfigurationException(\sprintf("Anonymizer '%s': unsupported enum data file type: '%s'.", $anonymizerId, $ext)); + } else { + throw new ConfigurationException(\sprintf("Unsupported enum data file type: '%s'.", $ext)); + } + } + + /** + * Iterator on column file contents. + * + * @return iterable> + */ + public static function readColumnFile(string $filename, ?Options $options = null, ?string $anonymizerId = null): iterable + { + $ext = self::getFileExtension($filename); + + // no match() usage here because CSV cannot expressed as a single expression. + if ('csv' === $ext || 'tsv' === $ext) { + yield from self::readCsvFile($filename, $options, $anonymizerId); + } else { + throw new ConfigurationException("Unsupported column data file type."); + } + } + + /** + * Iterator on plain text file lines. + * + * @return iterable + */ + public static function readTxtFile(string $filename, ?Options $options = null, ?string $anonymizerId = null): iterable + { + self::ensureFile($filename, $anonymizerId); + + $options ??= new Options(); + + $handle = null; + try { + $handle = \fopen($filename, 'r'); + + if (false === $handle) { + if ($anonymizerId) { + throw new ConfigurationException(\sprintf("Anonymizer '%s' could not open file: %s", $anonymizerId, $filename)); + } else { + throw new ConfigurationException(\sprintf("Could not open file: %s", $filename)); + } + } + + $first = true; + while ($line = \fgets($handle)) { + $line = \trim($line); // Trim whitespaces (including end of line). + + if ($first) { + $first = false; + if ($options->getBool('file_skip_header', false)) { + continue; // Skip header. + } + } + + if (empty($line)) { + continue; // Empty line, ignore. + } + + yield $line; + } + } finally { + if ($handle) { + @\fclose($handle); + } + } + } + + /** + * Iterator on CSV file contents. + * + * @return iterable> + */ + public static function readCsvFile(string $filename, ?Options $options = null, ?string $anonymizerId = null): iterable + { + self::ensureFile($filename, $anonymizerId); + + $options ??= new Options(); + + $handle = null; + try { + $handle = \fopen($filename, 'r'); + + if (false === $handle) { + if ($anonymizerId) { + throw new ConfigurationException(\sprintf("Anonymizer '%s' could not open file: %s", $anonymizerId, $filename)); + } else { + throw new ConfigurationException(\sprintf("Could not open file: %s", $filename)); + } + } + + $separator = $options->getString('file_csv_separator', ','); + $enclosure = $options->getString('file_csv_enclosure', '"'); + $escape = $options->getString('file_csv_escape', '\\'); + + $first = true; + while ($line = \fgetcsv($handle, null, $separator, $enclosure, $escape)) { + if ($first) { + $first = false; + if ($options->getBool('file_skip_header', false)) { + continue; // Skip header. + } + } + + if (!\array_filter($line)) { + continue; // Empty line, ignore. + } + + yield $line; + } + } finally { + if ($handle) { + @\fclose($handle); + } + } + } + + public static function ensureFile(string $filename, ?string $anonymizerId = null): void + { + if (!\file_exists($filename)) { + if ($anonymizerId) { + throw new ConfigurationException(\sprintf("Anonymizer '%s' uses a non existing file: %s", $anonymizerId, $filename)); + } else { + throw new ConfigurationException(\sprintf("Uses a non existing file: %s", $filename)); + } + } + if (!\is_file($filename)) { + if ($anonymizerId) { + throw new ConfigurationException(\sprintf("Anonymizer '%s' is not a regular file: %s", $anonymizerId, $filename)); + } else { + throw new ConfigurationException(\sprintf("Is not a regular file: %s", $filename)); + } + } + if (!\is_readable($filename)) { + if ($anonymizerId) { + throw new ConfigurationException(\sprintf("Anonymizer '%s' file cannot be read: %s", $anonymizerId, $filename)); + } else { + throw new ConfigurationException(\sprintf("File cannot be read: %s", $filename)); + } + } + } +} diff --git a/tests/Functional/Anonymizer/Core/FileEnumAnonymizerTest.php b/tests/Functional/Anonymizer/Core/FileEnumAnonymizerTest.php new file mode 100644 index 00000000..bfabcb45 --- /dev/null +++ b/tests/Functional/Anonymizer/Core/FileEnumAnonymizerTest.php @@ -0,0 +1,127 @@ +createOrReplaceTable( + 'table_test', + [ + 'id' => 'integer', + 'data' => 'string', + ], + [ + [ + 'id' => 1, + 'data' => 'test1', + ], + [ + 'id' => 2, + 'data' => 'test2', + ], + [ + 'id' => 3, + 'data' => 'test3', + ], + [ + 'id' => 4, + ], + ], + ); + } + + public function testAnonymize(): void + { + // File contents. + $sample = ['foo', 'a', '1']; + + $anonymizator = $this->createAnonymizatorWithConfig(new AnonymizerConfig( + 'table_test', + 'data', + 'file_enum', + new Options(['source' => \dirname(__DIR__, 3) . '/Resources/Anonymization/Pack/resources/enum-file.txt']) + )); + + self::assertSame( + 'test1', + $this->getDatabaseSession()->executeQuery('select data from table_test where id = 1')->fetchOne(), + ); + + $anonymizator->anonymize(); + + $datas = $this->getDatabaseSession()->executeQuery('select data from table_test order by id asc')->fetchFirstColumn(); + + $data = $datas[0]; + self::assertNotNull($data); + self::assertNotSame('test1', $data); + self::assertContains($data, $sample); + + $data = $datas[1]; + self::assertNotNull($data); + self::assertNotSame('test2', $data); + self::assertContains($data, $sample); + + $data = $datas[2]; + self::assertNotNull($data); + self::assertNotSame('test3', $data); + self::assertContains($data, $sample); + + self::assertNull($datas[3]); + + self::assertGreaterThan(1, \array_unique($datas), 'All generated values are different.'); + } + + public function testAnonymizeWithBasePath(): void + { + // File contents. + $sample = ['foo', 'a', '1']; + + $anonymizator = $this->createAnonymizatorWithConfig(new AnonymizerConfig( + 'table_test', + 'data', + 'file_enum', + new Options([ + // In tests, base path is '/var/www' because it's set to \getcwd() + // per default, which is docker workdir. + 'source' => './tests/Resources/Anonymization/Pack/resources/enum-file.txt', + ]), + )); + + self::assertSame( + 'test1', + $this->getDatabaseSession()->executeQuery('select data from table_test where id = 1')->fetchOne(), + ); + + $anonymizator->anonymize(); + + $datas = $this->getDatabaseSession()->executeQuery('select data from table_test order by id asc')->fetchFirstColumn(); + + $data = $datas[0]; + self::assertNotNull($data); + self::assertNotSame('test1', $data); + self::assertContains($data, $sample); + + $data = $datas[1]; + self::assertNotNull($data); + self::assertNotSame('test2', $data); + self::assertContains($data, $sample); + + $data = $datas[2]; + self::assertNotNull($data); + self::assertNotSame('test3', $data); + self::assertContains($data, $sample); + + self::assertNull($datas[3]); + + self::assertGreaterThan(1, \array_unique($datas), 'All generated values are different.'); + } +} diff --git a/tests/Resources/Anonymization/Pack/resources/enum-file-header.csv b/tests/Resources/Anonymization/Pack/resources/enum-file-header.csv new file mode 100644 index 00000000..8c96e363 --- /dev/null +++ b/tests/Resources/Anonymization/Pack/resources/enum-file-header.csv @@ -0,0 +1,3 @@ +First column,Second column,Third column +a,b,c +1,2,3 diff --git a/tests/Resources/Anonymization/Pack/resources/enum-file-header.txt b/tests/Resources/Anonymization/Pack/resources/enum-file-header.txt new file mode 100644 index 00000000..a483961b --- /dev/null +++ b/tests/Resources/Anonymization/Pack/resources/enum-file-header.txt @@ -0,0 +1,5 @@ +# Values +foo +a + +1 diff --git a/tests/Resources/Anonymization/Pack/resources/enum-file.csv b/tests/Resources/Anonymization/Pack/resources/enum-file.csv new file mode 100644 index 00000000..3ba66319 --- /dev/null +++ b/tests/Resources/Anonymization/Pack/resources/enum-file.csv @@ -0,0 +1,4 @@ +foo,bar,baz +a,b,c +1,2,3 +cat,dog,girafe \ No newline at end of file diff --git a/tests/Resources/Anonymization/Pack/resources/enum-file.tsv b/tests/Resources/Anonymization/Pack/resources/enum-file.tsv new file mode 100644 index 00000000..b329fc1d --- /dev/null +++ b/tests/Resources/Anonymization/Pack/resources/enum-file.tsv @@ -0,0 +1,2 @@ +'foo'#bar#baz +cat#dog#'gi\#rafe' \ No newline at end of file diff --git a/tests/Resources/Anonymization/Pack/resources/enum-file.txt b/tests/Resources/Anonymization/Pack/resources/enum-file.txt new file mode 100644 index 00000000..15a1e4be --- /dev/null +++ b/tests/Resources/Anonymization/Pack/resources/enum-file.txt @@ -0,0 +1,4 @@ +foo +a + +1 diff --git a/tests/Resources/Anonymization/Pack/resources/hexasmal.csv b/tests/Resources/Anonymization/Pack/resources/hexasmal.csv new file mode 100644 index 00000000..7becd366 --- /dev/null +++ b/tests/Resources/Anonymization/Pack/resources/hexasmal.csv @@ -0,0 +1,19 @@ +#Code_commune_INSEE;Nom_de_la_commune;Code_postal;Libellé_d_acheminement;Ligne_5 +01001;L ABERGEMENT CLEMENCIAT;01400;L ABERGEMENT CLEMENCIAT; +01002;L ABERGEMENT DE VAREY;01640;L ABERGEMENT DE VAREY; +01004;AMBERIEU EN BUGEY;01500;AMBERIEU EN BUGEY; +01005;AMBERIEUX EN DOMBES;01330;AMBERIEUX EN DOMBES; +01006;AMBLEON;01300;AMBLEON; +01007;AMBRONAY;01500;AMBRONAY; +01008;AMBUTRIX;01500;AMBUTRIX; +01009;ANDERT ET CONDON;01300;ANDERT ET CONDON; +01010;ANGLEFORT;01350;ANGLEFORT; +01011;APREMONT;01100;APREMONT; +01012;ARANC;01110;ARANC; +01013;ARANDAS;01230;ARANDAS; +01014;ARBENT;01100;ARBENT; +01015;ARBOYS EN BUGEY;01300;ARBOYS EN BUGEY;ARBIGNIEU +01015;ARBOYS EN BUGEY;01300;ARBOYS EN BUGEY;ST BOIS +01016;ARBIGNY;01190;ARBIGNY; +01017;ARGIS;01230;ARGIS; +01019;ARMIX;01510;ARMIX; diff --git a/tests/Unit/Helper/FileReaderTest.php b/tests/Unit/Helper/FileReaderTest.php new file mode 100644 index 00000000..ee9866e3 --- /dev/null +++ b/tests/Unit/Helper/FileReaderTest.php @@ -0,0 +1,93 @@ +getDirectory('resources/enum-file.txt')); + + self::assertSame( + ['foo', 'a', '1'], + \iterator_to_array($data), + ); + } + + public function testReadEnumFileSkipHeader(): void + { + $data = FileReader::readEnumFile($this->getDirectory('resources/enum-file-header.txt'), new Options([ + 'file_skip_header' => true, + ])); + + self::assertSame( + ['foo', 'a', '1'], + \iterator_to_array($data), + ); + } + + public function testReadEnumFileCsv(): void + { + $data = FileReader::readEnumFile($this->getDirectory('resources/enum-file.csv')); + + self::assertSame( + ['foo', 'a', '1', 'cat'], + \iterator_to_array($data), + ); + } + + public function testReadEnumFileCsvSkipHeader(): void + { + $data = FileReader::readEnumFile($this->getDirectory('resources/enum-file-header.csv'), new Options([ + 'file_skip_header' => true, + ])); + + self::assertSame( + ['a', '1'], + \iterator_to_array($data), + ); + } + + public function testReadColumnFileCsv(): void + { + $data = FileReader::readColumnFile($this->getDirectory('resources/enum-file.csv')); + + self::assertSame( + [ + ['foo', 'bar', 'baz'], + ['a', 'b', 'c'], + ['1', '2', '3'], + ['cat', 'dog', 'girafe'], + ], + \iterator_to_array($data), + ); + } + + public function testReadColumnFileTsv(): void + { + $data = FileReader::readColumnFile($this->getDirectory('resources/enum-file.tsv'), new Options([ + 'file_csv_enclosure' => "'", + 'file_csv_escape' => '\\', + 'file_csv_separator' => "#", + ])); + + self::assertSame( + [ + ['foo', 'bar', 'baz'], + ['cat', 'dog', 'gi\#rafe'], + ], + \iterator_to_array($data), + ); + } +}