44
55namespace Rechtlogisch \TseId ;
66
7+ use RuntimeException ;
78use Symfony \Component \BrowserKit \HttpBrowser ;
89use Symfony \Component \DomCrawler \Crawler ;
910use Symfony \Component \HttpClient \HttpClient ;
@@ -13,15 +14,18 @@ class Retrieve
1314{
1415 private int $ countPages = 1 ;
1516
16- private const URL = 'https://www.bsi.bund.de/EN/Themen/Unternehmen-und-Organisationen/Standards-und-Zertifizierung/Zertifizierung-und-Anerkennung/Listen/Zertifizierte-Produkte-nach-TR/Technische_Sicherheitseinrichtungen/TSE_node.html?gts=913608_list%253Dtitle_text_sort%252Bdesc>p=913608_list%253D ' ;
17+ public const URL = 'https://www.bsi.bund.de/EN/Themen/Unternehmen-und-Organisationen/Standards-und-Zertifizierung/Zertifizierung-und-Anerkennung/Listen/Zertifizierte-Produkte-nach-TR/Technische_Sicherheitseinrichtungen/TSE_node.html?gts=913608_list%253Dtitle_text_sort%252Bdesc>p=913608_list%253D ' ;
1718
1819 /**
1920 * @var array<string, array<string, string>>
2021 */
2122 private array $ retrieved = [];
2223
23- public function __construct ()
24+ private HttpBrowser $ browser ;
25+
26+ public function __construct (?HttpBrowser $ browser = null )
2427 {
28+ $ this ->browser = $ browser ?? new HttpBrowser (HttpClient::create ());
2529 $ this ->run ();
2630 }
2731
@@ -41,48 +45,53 @@ public function page(int $no = 1): void
4145 {
4246 $ url = self ::URL .$ no ;
4347
44- $ browser = new HttpBrowser (HttpClient:: create ());
45- $ crawler = $ browser ->request ('GET ' , $ url );
48+ try {
49+ $ crawler = $ this -> browser ->request ('GET ' , $ url );
4650
47- if ($ no === 1 ) {
48- $ paginationText = $ crawler ->filter ('#content nav.c-pagination p ' )->text ();
49- preg_match ('/Search results (\d+) to (\d+) from a total of (\d+)/ ' , $ paginationText , $ matches );
50- $ max = $ matches [3 ] ?? '0 ' ;
51+ if ($ no === 1 ) {
52+ $ paginationText = $ crawler ->filter ('#content nav.c-pagination p ' )->text ();
53+ preg_match ('/Search results (\d+) to (\d+) from a total of (\d+)/ ' , $ paginationText , $ matches );
54+ $ max = $ matches [3 ] ?? '0 ' ;
5155
52- $ this ->countPages = (int ) ceil ((int ) $ max / 10 );
53- }
56+ $ this ->countPages = (int ) ceil ((int ) $ max / 10 );
57+ }
5458
55- $ crawler ->filter ('#content div.wrapperTable table.textualData tbody tr ' )->each (function (Crawler $ row ) {
56- $ rowData = [];
57- $ tseId = null ;
58-
59- $ row ->filter ('td ' )->each (function (Crawler $ cell , int $ index ) use (&$ rowData , &$ tseId ) {
60- $ header = '' ;
61-
62- switch ($ index ) {
63- case 0 : $ header = 'tse_id ' ;
64- break ;
65- case 1 : $ header = 'content ' ;
66- break ;
67- case 2 : $ header = 'manufacturer ' ;
68- break ;
69- case 3 : $ header = 'date_issuance ' ;
70- break ;
71- }
72-
73- if ($ header === 'tse_id ' ) {
74- $ fullIdText = $ cell ->text ();
75- $ tseId = str_replace ('BSI-K-TR- ' , '' , $ fullIdText );
76- [$ id , $ year ] = explode ('- ' , $ tseId );
77- $ rowData ['id ' ] = $ id ;
78- $ rowData ['year ' ] = $ year ;
79- } else {
80- $ rowData [$ header ] = trim ($ cell ->text ());
81- }
59+ $ crawler ->filter ('#content div.wrapperTable table.textualData tbody tr ' )->each (function (Crawler $ row ) {
60+ $ rowData = [];
61+ $ tseId = null ;
62+
63+ $ row ->filter ('td ' )->each (function (Crawler $ cell , int $ index ) use (&$ rowData , &$ tseId ) {
64+ $ header = '' ;
65+
66+ switch ($ index ) {
67+ case 0 : $ header = 'tse_id ' ;
68+ break ;
69+ case 1 : $ header = 'content ' ;
70+ break ;
71+ case 2 : $ header = 'manufacturer ' ;
72+ break ;
73+ case 3 : $ header = 'date_issuance ' ;
74+ break ;
75+ }
76+
77+ if ($ header === 'tse_id ' ) {
78+ $ fullIdText = $ cell ->text ();
79+ $ tseId = str_replace ('BSI-K-TR- ' , '' , $ fullIdText );
80+ [$ id , $ year ] = explode ('- ' , $ tseId );
81+ $ rowData ['id ' ] = $ id ;
82+ $ rowData ['year ' ] = $ year ;
83+ } else {
84+ $ rowData [$ header ] = trim ($ cell ->text ());
85+ }
86+ });
87+
88+ $ this ->retrieved [$ tseId ] = $ rowData ;
8289 });
83-
84- $ this ->retrieved [$ tseId ] = $ rowData ;
85- });
90+ } catch (Throwable $ e ) {
91+ $ html = $ this ->browser ->getResponse ()->getContent ();
92+ throw (new RetrieveException ($ e ->getMessage (), $ e ->getCode (), $ e ->getPrevious ()))
93+ ->addContext ($ url , $ html );
94+ }
8695 }
8796
8897 /**
@@ -112,13 +121,13 @@ public function json(?string $tseId = null, bool $pretty = true): string
112121 /**
113122 * @return array<string, string>
114123 */
115- public function save (mixed $ path = '. ' ): array
124+ public function save (string $ path = '. ' ): array
116125 {
117126 $ files = [];
118127
119128 $ prefix = date ('Y-m-d ' );
120129
121- $ files ['json ' ] = $ this ->saveJson ($ path .DIRECTORY_SEPARATOR .$ prefix ); // @phpstan-ignore-line
130+ $ files ['json ' ] = $ this ->saveJson ($ path .DIRECTORY_SEPARATOR .$ prefix );
122131
123132 return $ files ;
124133 }
@@ -127,16 +136,21 @@ private function saveJson(string $path): string
127136 {
128137 $ content = $ this ->json ();
129138 $ pathWithExtension = $ path .'.json ' ;
139+ $ dir = dirname ($ pathWithExtension );
140+ if (! is_dir ($ dir ) || ! is_writable ($ dir )) {
141+ return '' ;
142+ }
143+
130144 $ result = file_put_contents ($ pathWithExtension , $ content );
131145
132146 if ($ result === false ) {
133- return '' ;
147+ throw new RuntimeException ( ' Could not write file: ' . $ pathWithExtension ) ;
134148 }
135149
136150 $ path = realpath ($ pathWithExtension );
137151
138152 if ($ path === false ) {
139- return '' ;
153+ throw new RuntimeException ( ' Could not determine real path: ' . $ pathWithExtension ) ;
140154 }
141155
142156 return $ path ;
0 commit comments