Skip to content

Commit 995ca19

Browse files
committed
[FEATURE] Get encoding from XML source, resolves #2
1 parent 4419224 commit 995ca19

File tree

5 files changed

+32
-14
lines changed

5 files changed

+32
-14
lines changed

ChangeLog

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
2026-03-13 Francois Suter (Idéative) <typo3@ideative.ch>
22

33
* Add new "requestOptions" parameter, resolves #14
4+
* Get encoding from XML source, resolves #2
45

56
2026-02-02 Francois Suter (Idéative) <typo3@ideative.ch>
67

Classes/Service/ConnectorFeed.php

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,20 @@ protected function query(): mixed
183183
);
184184
}
185185
// Check if the current charset is the same as the file encoding
186-
// Don't do the check if no encoding was defined
187-
if (empty($this->parameters['encoding'])) {
188-
$encoding = null;
189-
$isSameCharset = true;
190-
} else {
186+
// Use explicitly defined encoding first, then try getting encoding from XML declaration
187+
if (!empty($this->parameters['encoding'])) {
191188
$encoding = $this->parameters['encoding'];
192189
$isSameCharset = $this->getCharset() === $encoding;
190+
} else {
191+
$dom = new \DOMDocument();
192+
$dom->loadXML($data, LIBXML_PARSEHUGE);
193+
if (!empty($dom->xmlEncoding)) {
194+
$encoding = $dom->xmlEncoding;
195+
$isSameCharset = $this->getCharset() === $encoding;
196+
} else {
197+
$encoding = null;
198+
$isSameCharset = true;
199+
}
193200
}
194201
// If the charset is not the same, convert data
195202
if (!$isSameCharset) {

Documentation/Configuration/Index.rst

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,6 @@ Description
117117
the encoding values recognized by the PHP libray "mbstring". See
118118
https://www.php.net/manual/en/mbstring.supported-encodings.php
119119

120-
.. warning::
121-
122-
If your are aiming for the array format (i.e. calling
123-
:code:`fetchArray()`), you should not define this property. Indeed the
124-
:code:`\Cobweb\Svconnector\Utility\ConnectorUtility::convertXmlToArray()`
125-
which is used in this case relies on the SimpleXML library, which
126-
already takes care of the encoding conversion. To avoid a double
127-
encoding just ignore this property.
120+
It is not necessary to define this parameter if the XML source contains an
121+
encoding in its declaration. It will automatically be retrieved. However,
122+
if this parameter is defined, it supersedes the encoding from the XML declaration.

Documentation/Installation/Index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ It makes it possible to use any of the request options supported by Guzzle HTTP.
2929
The "headers" parameter has been deprecated. Headers should be passed as part of the
3030
"requestOptions" instead.
3131

32+
Encoding is now guessed from the declaration of the XML source, if defined.
33+
3234

3335
.. _installation-update-500:
3436

Tests/Functional/ConnectorFeedTest.php

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public static function sourceDataProvider(): array
7171
EOT
7272

7373
],
74-
'ISO-8859-1 data' => [
74+
'ISO-8859-1 data with explicit encoding' => [
7575
'parameters' => [
7676
'uri' => 'EXT:svconnector_feed/Tests/Functional/Fixtures/data_latin1.xml',
7777
'encoding' => 'iso-8859-1',
@@ -83,6 +83,19 @@ public static function sourceDataProvider(): array
8383
<name>Porte interdùm lacîna c'est euismod.</name>
8484
</item>
8585
</items>
86+
EOT
87+
],
88+
'ISO-8859-1 data with implicit encoding' => [
89+
'parameters' => [
90+
'uri' => 'EXT:svconnector_feed/Tests/Functional/Fixtures/data_latin1.xml',
91+
],
92+
'result' => <<<EOT
93+
<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
94+
<items>
95+
<item>
96+
<name>Porte interdùm lacîna c'est euismod.</name>
97+
</item>
98+
</items>
8699
EOT
87100
],
88101
];

0 commit comments

Comments
 (0)