Skip to content

Commit 748a5ab

Browse files
authored
Add a new HTMLElementEntry & related stuff (#1972)
* Add a new `HTMLElementEntry` & related stuff * Adjust DOM-related functions to work with HTML * Adjust string type casting with HTML data
1 parent 49b5a02 commit 748a5ab

File tree

37 files changed

+821
-122
lines changed

37 files changed

+821
-122
lines changed

phpstan.neon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ parameters:
8787

8888
ignoreErrors:
8989
-
90-
message: '#Dom\\(HTMLDocument|Element)#i'
90+
message: '#Dom\\(HTMLDocument|HTMLElement|Element)#i'
9191
identifier: class.notFound
9292

9393
includes:

rector.tests.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
use Flow\ETL\FlowContext;
2828
use Flow\Types\Type\Logical\DateTimeType;
2929
use Flow\Types\Type\Logical\DateType;
30+
use Flow\Types\Type\Logical\HTMLElementType;
3031
use Flow\Types\Type\Logical\HTMLType;
3132
use Flow\Types\Type\Logical\JsonType;
3233
use Flow\Types\Type\Logical\ListType;
@@ -178,6 +179,7 @@
178179
new NewObjectToFunction(XMLElementType::class, 'Flow\ETL\DSL\type_xml_element'),
179180
new NewObjectToFunction(XMLType::class, 'Flow\ETL\DSL\type_xml'),
180181
new NewObjectToFunction(HTMLType::class, 'Flow\ETL\DSL\type_html'),
182+
new NewObjectToFunction(HTMLElementType::class, 'Flow\ETL\DSL\type_html_element'),
181183

182184
// Extractors
183185
new NewObjectToFunction(CacheExtractor::class, 'from_cache'),

src/adapter/etl-adapter-doctrine/src/Flow/ETL/Adapter/Doctrine/SchemaConverter.php

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,14 @@
66

77
use function Flow\Types\DSL\type_string;
88
use Doctrine\DBAL\Schema\{Column, Index, Table};
9-
use Doctrine\DBAL\Types\{DateImmutableType, DateTimeImmutableType, GuidType, TimeImmutableType};
109
use Doctrine\DBAL\Types\Type as DbalType;
1110
use Flow\ETL\Exception\InvalidArgumentException;
1211
use Flow\ETL\Schema;
1312
use Flow\ETL\Schema\{Definition, Metadata};
1413
use Flow\Types\Type;
15-
use Flow\Types\Type\Logical\{DateTimeType,
16-
DateType,
17-
JsonType,
18-
ListType,
19-
MapType,
20-
StructureType,
21-
TimeType,
22-
UuidType,
23-
XMLElementType,
24-
XMLType};
25-
use Flow\Types\Type\Native\{BooleanType, FloatType, IntegerType, StringType};
2614

2715
final readonly class SchemaConverter
2816
{
29-
public const DEFAULT_TYPES = [
30-
StringType::class => \Doctrine\DBAL\Types\StringType::class,
31-
IntegerType::class => \Doctrine\DBAL\Types\IntegerType::class,
32-
FloatType::class => \Doctrine\DBAL\Types\FloatType::class,
33-
BooleanType::class => \Doctrine\DBAL\Types\BooleanType::class,
34-
DateType::class => DateImmutableType::class,
35-
TimeType::class => TimeImmutableType::class,
36-
DateTimeType::class => DateTimeImmutableType::class,
37-
UuidType::class => GuidType::class,
38-
JsonType::class => \Doctrine\DBAL\Types\JsonType::class,
39-
XMLType::class => \Doctrine\DBAL\Types\StringType::class,
40-
XMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
41-
ListType::class => \Doctrine\DBAL\Types\JsonType::class,
42-
MapType::class => \Doctrine\DBAL\Types\JsonType::class,
43-
StructureType::class => \Doctrine\DBAL\Types\JsonType::class,
44-
];
45-
4617
private TypesMap $typesMap;
4718

4819
/**

src/adapter/etl-adapter-doctrine/src/Flow/ETL/Adapter/Doctrine/TypesMap.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
use Flow\Types\Type as FlowType;
1111
use Flow\Types\Type\Logical\{DateTimeType,
1212
DateType,
13+
HTMLElementType,
14+
HTMLType,
1315
JsonType,
1416
ListType,
1517
MapType,
@@ -63,6 +65,8 @@ final class TypesMap
6365
JsonType::class => \Doctrine\DBAL\Types\JsonType::class,
6466
XMLType::class => \Doctrine\DBAL\Types\StringType::class,
6567
XMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
68+
HTMLType::class => \Doctrine\DBAL\Types\StringType::class,
69+
HTMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
6670
ListType::class => \Doctrine\DBAL\Types\JsonType::class,
6771
MapType::class => \Doctrine\DBAL\Types\JsonType::class,
6872
StructureType::class => \Doctrine\DBAL\Types\JsonType::class,

src/adapter/etl-adapter-doctrine/tests/Flow/ETL/Adapter/Doctrine/Tests/Unit/TypesMapTest.php

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,18 @@
88
use Doctrine\DBAL\Types\DateTimeTzType;
99
use Flow\ETL\Adapter\Doctrine\TypesMap;
1010
use Flow\ETL\Exception\InvalidArgumentException;
11-
use Flow\Types\Type\Logical\{DateTimeType, DateType, JsonType, ListType, MapType, StructureType, TimeType, UuidType, XMLElementType, XMLType};
11+
use Flow\Types\Type\Logical\{DateTimeType,
12+
DateType,
13+
HTMLElementType,
14+
HTMLType,
15+
JsonType,
16+
ListType,
17+
MapType,
18+
StructureType,
19+
TimeType,
20+
UuidType,
21+
XMLElementType,
22+
XMLType};
1223
use Flow\Types\Type\Native\{BooleanType, FloatType, IntegerType, StringType};
1324
use PHPUnit\Framework\TestCase;
1425

@@ -175,6 +186,8 @@ public function test_default_flow_types_constant_mapping() : void
175186
JsonType::class => DbalJsonType::class,
176187
XMLType::class => \Doctrine\DBAL\Types\StringType::class,
177188
XMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
189+
HTMLType::class => \Doctrine\DBAL\Types\StringType::class,
190+
HTMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
178191
ListType::class => DbalJsonType::class,
179192
MapType::class => DbalJsonType::class,
180193
StructureType::class => DbalJsonType::class,

src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/SchemaConverter.php

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
use Flow\Types\Type;
2626
use Flow\Types\Type\Logical\{DateTimeType,
2727
DateType,
28+
HTMLType,
2829
JsonType,
2930
ListType,
3031
MapType,
@@ -34,6 +35,7 @@
3435
UuidType,
3536
XMLElementType,
3637
XMLType};
38+
use Flow\Types\Type\Logical\HTMLElementType;
3739
use Flow\Types\Type\Native\{BooleanType, FloatType, IntegerType, StringType};
3840

3941
final class SchemaConverter
@@ -77,6 +79,10 @@ private function flowToParquet(string $name, Type $type, bool $nullable) : Colum
7779
return FlatColumn::float($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
7880
case IntegerType::class:
7981
return FlatColumn::int64($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
82+
case HTMLType::class:
83+
case HTMLElementType::class:
84+
case XMLElementType::class:
85+
case XMLType::class:
8086
case StringType::class:
8187
return FlatColumn::string($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
8288
case BooleanType::class:
@@ -91,9 +97,6 @@ private function flowToParquet(string $name, Type $type, bool $nullable) : Colum
9197
return FlatColumn::uuid($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
9298
case JsonType::class:
9399
return FlatColumn::json($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
94-
case XMLType::class:
95-
case XMLElementType::class:
96-
return FlatColumn::string($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
97100
case ListType::class:
98101
$elementType = $type->element();
99102
$elementOptional = $elementType instanceof OptionalType;

src/bridge/openapi/specification/src/Flow/Bridge/OpenAPI/Specification/OpenAPIConverter.php

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,18 @@
99
use Flow\ETL\Schema;
1010
use Flow\ETL\Schema\{Definition, Metadata};
1111
use Flow\Types\Type;
12-
use Flow\Types\Type\Logical\{DateTimeType, DateType, JsonType, ListType, MapType, StructureType, TimeType, UuidType, XMLElementType, XMLType};
12+
use Flow\Types\Type\Logical\{DateTimeType,
13+
DateType,
14+
HTMLElementType,
15+
HTMLType,
16+
JsonType,
17+
ListType,
18+
MapType,
19+
StructureType,
20+
TimeType,
21+
UuidType,
22+
XMLElementType,
23+
XMLType};
1324
use Flow\Types\Type\Native\{ArrayType, BooleanType, EnumType, FloatType, IntegerType, StringType};
1425

1526
/**
@@ -423,13 +434,15 @@ private function convertTypeToOpenAPI(Type $type) : array
423434
BooleanType::class => ['type' => 'boolean'],
424435
IntegerType::class => ['type' => 'integer'],
425436
FloatType::class => ['type' => 'number'],
426-
StringType::class => ['type' => 'string'],
437+
StringType::class,
438+
HTMLType::class,
439+
HTMLElementType::class => ['type' => 'string'],
427440
DateType::class => ['type' => 'string', 'format' => 'date'],
428441
DateTimeType::class => ['type' => 'string', 'format' => 'date-time'],
429442
TimeType::class => ['type' => 'string', 'format' => 'time'],
430443
UuidType::class => ['type' => 'string', 'format' => 'uuid'],
431444
JsonType::class => ['type' => 'string', 'format' => 'json'],
432-
XMLType::class => ['type' => 'string', 'format' => 'xml'],
445+
XMLType::class,
433446
XMLElementType::class => ['type' => 'string', 'format' => 'xml'],
434447
EnumType::class => $this->convertEnumToOpenAPI($type),
435448
ArrayType::class => $this->convertArrayToOpenAPI($type),

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
type_xml_element as type_xml_element_new,
3838
types as types_new
3939
};
40-
use Dom\HTMLDocument;
40+
use Dom\{HTMLDocument, HTMLElement};
4141
use Flow\Calculator\Rounding;
4242
use Flow\ETL\{Analyze,
4343
Attribute\DocumentationDSL,
@@ -165,7 +165,22 @@
165165
use Flow\ETL\Retry\DelayFactory\{Fixed, Fixed\FixedMilliseconds};
166166
use Flow\ETL\Retry\RetryStrategy\{AnyThrowable, OnExceptionTypes};
167167
use Flow\ETL\Row\{Entries, EntryFactory, SortOrder};
168-
use Flow\ETL\Row\Entry\{BooleanEntry, DateEntry, DateTimeEntry, EnumEntry, FloatEntry, IntegerEntry, JsonEntry, ListEntry, MapEntry, StringEntry, StructureEntry, TimeEntry, UuidEntry, XMLElementEntry, XMLEntry};
168+
use Flow\ETL\Row\Entry\{BooleanEntry,
169+
DateEntry,
170+
DateTimeEntry,
171+
EnumEntry,
172+
FloatEntry,
173+
HTMLElementEntry,
174+
IntegerEntry,
175+
JsonEntry,
176+
ListEntry,
177+
MapEntry,
178+
StringEntry,
179+
StructureEntry,
180+
TimeEntry,
181+
UuidEntry,
182+
XMLElementEntry,
183+
XMLEntry};
169184
use Flow\ETL\Row\Entry\HTMLEntry;
170185
use Flow\ETL\Row\{Entry, EntryReference, Reference, References};
171186
use Flow\ETL\Row\Formatter\ASCIISchemaFormatter;
@@ -637,6 +652,15 @@ function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $me
637652
return new HTMLEntry($name, $value, $metadata);
638653
}
639654

655+
/**
656+
* @return Entry<?HTMLElement>
657+
*/
658+
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
659+
function html_element_entry(string $name, HTMLElement|string|null $value, ?Metadata $metadata = null) : Entry
660+
{
661+
return new HTMLElementEntry($name, $value, $metadata);
662+
}
663+
640664
/**
641665
* @param Entry<mixed> ...$entries
642666
*/
@@ -1978,6 +2002,15 @@ function html_schema(string $name, bool $nullable = false, ?Metadata $metadata =
19782002
return Definition::html($name, $nullable, $metadata);
19792003
}
19802004

2005+
/**
2006+
* @return Definition<HTMLElement>
2007+
*/
2008+
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
2009+
function html_element_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition
2010+
{
2011+
return Definition::html_element($name, $nullable, $metadata);
2012+
}
2013+
19812014
/**
19822015
* @return Definition<\DOMDocument>
19832016
*/

src/core/etl/src/Flow/ETL/Function/DOMElementAttributeValue.php

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,33 @@
55
namespace Flow\ETL\Function;
66

77
use function Flow\Types\DSL\{type_instance_of, type_list};
8+
use Dom\HTMLElement;
89
use Flow\ETL\Row;
910

1011
final class DOMElementAttributeValue extends ScalarFunctionChain
1112
{
1213
public function __construct(
13-
private readonly ScalarFunction|\DOMNode $domElement,
14+
private readonly ScalarFunction|\DOMNode|HTMLElement $domElement,
1415
private readonly ScalarFunction|string $attribute,
1516
) {
1617
}
1718

1819
public function eval(Row $row) : ?string
1920
{
20-
$node = (new Parameter($this->domElement))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
21+
$types = [
22+
type_instance_of(\DOMNode::class),
23+
type_list(type_instance_of(\DOMNode::class)),
24+
];
25+
26+
if (\class_exists('\Dom\HTMLElement')) {
27+
$types[] = type_instance_of(HTMLElement::class);
28+
$types[] = type_list(type_instance_of(HTMLElement::class));
29+
}
30+
31+
$node = (new Parameter($this->domElement))->as(
32+
$row,
33+
...$types
34+
);
2135

2236
if ($node instanceof \DOMDocument) {
2337
$node = $node->documentElement;
@@ -33,13 +47,11 @@ public function eval(Row $row) : ?string
3347
return null;
3448
}
3549

36-
if (!$node instanceof \DOMNode || !$node->hasAttributes()) {
50+
if ((!$node instanceof \DOMNode && !$node instanceof HTMLElement) || !$node->hasAttributes()) {
3751
return null;
3852
}
3953

40-
$attributes = $node->attributes;
41-
42-
if (!$namedItem = $attributes->getNamedItem($attributeName)) {
54+
if (!$namedItem = $node->attributes->getNamedItem($attributeName)) {
4355
return null;
4456
}
4557

src/core/etl/src/Flow/ETL/Function/DOMElementAttributesCount.php

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,27 @@
44

55
namespace Flow\ETL\Function;
66

7+
use function Flow\Types\DSL\type_instance_of;
8+
use Dom\HTMlElement;
79
use Flow\ETL\Row;
810

911
final class DOMElementAttributesCount extends ScalarFunctionChain
1012
{
11-
public function __construct(private readonly ScalarFunction|\DOMNode $domElement)
13+
public function __construct(private readonly ScalarFunction|\DOMNode|HTMlElement $domElement)
1214
{
1315
}
1416

1517
public function eval(Row $row) : ?int
1618
{
17-
$domElement = (new Parameter($this->domElement))->asInstanceOf($row, \DOMElement::class);
19+
$types = [
20+
type_instance_of(\DOMElement::class),
21+
];
22+
23+
if (\class_exists('\Dom\HTMLElement')) {
24+
$types[] = type_instance_of(HTMLElement::class);
25+
}
26+
27+
$domElement = (new Parameter($this->domElement))->as($row, ...$types);
1828

1929
if ($domElement === null) {
2030
return null;

0 commit comments

Comments
 (0)