1+ <?php
2+ /*
3+ * This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
4+ *
5+ * Copyright (C) 2019 - 2025 Jan Böhmer (https://github.com/jbtronics)
6+ *
7+ * This program is free software: you can redistribute it and/or modify
8+ * it under the terms of the GNU Affero General Public License as published
9+ * by the Free Software Foundation, either version 3 of the License, or
10+ * (at your option) any later version.
11+ *
12+ * This program is distributed in the hope that it will be useful,
13+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+ * GNU Affero General Public License for more details.
16+ *
17+ * You should have received a copy of the GNU Affero General Public License
18+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
19+ */
20+
21+ declare (strict_types=1 );
22+
23+
24+ namespace App \Services \InfoProviderSystem \Providers ;
25+
26+ use App \Entity \Parts \ManufacturingStatus ;
27+ use App \Entity \Parts \Part ;
28+ use App \Services \InfoProviderSystem \DTOs \FileDTO ;
29+ use App \Services \InfoProviderSystem \DTOs \ParameterDTO ;
30+ use App \Services \InfoProviderSystem \DTOs \PartDetailDTO ;
31+ use App \Services \InfoProviderSystem \DTOs \PriceDTO ;
32+ use App \Services \InfoProviderSystem \DTOs \PurchaseInfoDTO ;
33+ use App \Services \InfoProviderSystem \DTOs \SearchResultDTO ;
34+ use Symfony \Component \DependencyInjection \Attribute \Autowire ;
35+ use Symfony \Component \DomCrawler \Crawler ;
36+ use Symfony \Contracts \HttpClient \HttpClientInterface ;
37+
38+ class PollinProvider implements InfoProviderInterface
39+ {
40+
41+ public function __construct (private readonly HttpClientInterface $ client ,
42+ #[Autowire(env: 'bool:PROVIDER_POLLIN_ENABLED ' )]
43+ private readonly bool $ enabled = true ,
44+ )
45+ {
46+ }
47+
48+ public function getProviderInfo (): array
49+ {
50+ return [
51+ 'name ' => 'Pollin ' ,
52+ 'description ' => 'Webscrapping from pollin.de to get part information ' ,
53+ 'url ' => 'https://www.reichelt.de/ ' ,
54+ 'disabled_help ' => 'Set PROVIDER_POLLIN_ENABLED env to 1 '
55+ ];
56+ }
57+
58+ public function getProviderKey (): string
59+ {
60+ return 'pollin ' ;
61+ }
62+
63+ public function isActive (): bool
64+ {
65+ return $ this ->enabled ;
66+ }
67+
68+ public function searchByKeyword (string $ keyword ): array
69+ {
70+ $ response = $ this ->client ->request ('GET ' , 'https://www.pollin.de/search ' , [
71+ 'query ' => [
72+ 'search ' => $ keyword
73+ ]
74+ ]);
75+
76+ $ content = $ response ->getContent ();
77+ $ dom = new Crawler ($ content );
78+
79+ $ results = [];
80+
81+ //Iterate over each div.product-box
82+ $ dom ->filter ('div.product-box ' )->each (function (Crawler $ node ) use (&$ results ) {
83+ $ results [] = new SearchResultDTO (
84+ provider_key: $ this ->getProviderKey (),
85+ provider_id: $ node ->filter ('meta[itemprop="productID"] ' )->attr ('content ' ),
86+ name: $ node ->filter ('a.product-name ' )->text (),
87+ description: '' ,
88+ preview_image_url: $ node ->filter ('img.product-image ' )->attr ('src ' ),
89+ manufacturing_status: $ this ->mapAvailability ($ node ->filter ('link[itemprop="availability"] ' )->attr ('href ' )),
90+ provider_url: $ node ->filter ('a.product-name ' )->attr ('href ' )
91+ );
92+ });
93+
94+ return $ results ;
95+ }
96+
97+ private function mapAvailability (string $ availabilityURI ): ManufacturingStatus
98+ {
99+ return match ( $ availabilityURI ) {
100+ 'http://schema.org/InStock ' => ManufacturingStatus::ACTIVE ,
101+ 'http://schema.org/OutOfStock ' => ManufacturingStatus::DISCONTINUED ,
102+ default => ManufacturingStatus::NOT_SET
103+ };
104+ }
105+
106+ public function getDetails (string $ id ): PartDetailDTO
107+ {
108+ //Ensure that $id is numeric
109+ if (!is_numeric ($ id )) {
110+ throw new \InvalidArgumentException ("The id must be numeric! " );
111+ }
112+
113+ $ response = $ this ->client ->request ('GET ' , 'https://www.pollin.de/search ' , [
114+ 'query ' => [
115+ 'search ' => $ id
116+ ]
117+ ]);
118+
119+ //The response must have us redirected to the product page
120+ if ($ response ->getInfo ('redirect_count ' ) > 0 ) {
121+ throw new \RuntimeException ("Could not resolve the product page for the given id! " );
122+ }
123+
124+ $ content = $ response ->getContent ();
125+
126+ return $ this ->parseProductPage ($ content );
127+ }
128+
129+ private function parseProductPage (string $ content ): PartDetailDTO
130+ {
131+ $ dom = new Crawler ($ content );
132+
133+ $ productPageUrl = $ dom ->filter ('meta[property="product:product_link"] ' )->attr ('content ' );
134+ $ orderId = trim ($ dom ->filter ('span[itemprop="sku"] ' )->text ()); //Text is important here
135+
136+ //Calculate the mass
137+ $ massStr = $ dom ->filter ('meta[itemprop="weight"] ' )->attr ('content ' );
138+ //Remove the unit
139+ $ massStr = str_replace ('kg ' , '' , $ massStr );
140+ //Convert to float and convert to grams
141+ $ mass = (float ) $ massStr * 1000 ;
142+
143+ //Parse purchase info
144+ $ purchaseInfo = new PurchaseInfoDTO (
145+ 'Pollin ' ,
146+ $ orderId ,
147+ [
148+ new PriceDTO (1 , $ dom ->filter ('meta[property="product:price:amount"] ' )->attr ('content ' ), $ dom ->filter ('meta[property="product:price:currency"] ' )->attr ('content ' ))
149+ ],
150+ $ productPageUrl
151+ );
152+
153+
154+
155+ return new PartDetailDTO (
156+ provider_key: $ this ->getProviderKey (),
157+ provider_id: $ orderId ,
158+ name: trim ($ dom ->filter ('meta[property="og:title"] ' )->attr ('content ' )),
159+ description: $ dom ->filter ('meta[property="og:description"] ' )->attr ('content ' ),
160+ category: $ this ->parseCategory ($ dom ),
161+ manufacturer: $ dom ->filter ('meta[property="product:brand"] ' )->count () > 0 ? $ dom ->filter ('meta[property="product:brand"] ' )->attr ('content ' ) : null ,
162+ preview_image_url: $ dom ->filter ('meta[property="og:image"] ' )->attr ('content ' ),
163+ manufacturing_status: $ this ->mapAvailability ($ dom ->filter ('link[itemprop="availability"] ' )->attr ('href ' )),
164+ provider_url: $ productPageUrl ,
165+ notes: $ this ->parseNotes ($ dom ),
166+ datasheets: $ this ->parseDatasheets ($ dom ),
167+ parameters: $ this ->parseParameters ($ dom ),
168+ vendor_infos: [$ purchaseInfo ],
169+ mass: $ mass ,
170+ );
171+ }
172+
173+ private function parseDatasheets (Crawler $ dom ): array
174+ {
175+ //Iterate over each a element withing div.pol-product-detail-download-files
176+ $ datasheets = [];
177+ $ dom ->filter ('div.pol-product-detail-download-files a ' )->each (function (Crawler $ node ) use (&$ datasheets ) {
178+ $ datasheets [] = new FileDTO ($ node ->attr ('href ' ), $ node ->text ());
179+ });
180+
181+ return $ datasheets ;
182+ }
183+
184+ private function parseParameters (Crawler $ dom ): array
185+ {
186+ $ parameters = [];
187+
188+ //Iterate over each tr.properties-row inside table.product-detail-properties-table
189+ $ dom ->filter ('table.product-detail-properties-table tr.properties-row ' )->each (function (Crawler $ node ) use (&$ parameters ) {
190+ $ parameters [] = ParameterDTO::parseValueField (
191+ name: rtrim ($ node ->filter ('th.properties-label ' )->text (), ': ' ),
192+ value: $ node ->filter ('td.properties-value ' )->text ()
193+ );
194+ });
195+
196+ return $ parameters ;
197+ }
198+
199+ private function parseCategory (Crawler $ dom ): string
200+ {
201+ $ category = '' ;
202+
203+ //Iterate over each li.breadcrumb-item inside ol.breadcrumb
204+ $ dom ->filter ('ol.breadcrumb li.breadcrumb-item ' )->each (function (Crawler $ node ) use (&$ category ) {
205+ //Skip if it has breadcrumb-item-home class
206+ if (str_contains ($ node ->attr ('class ' ), 'breadcrumb-item-home ' )) {
207+ return ;
208+ }
209+
210+
211+ $ category .= $ node ->text () . ' -> ' ;
212+ });
213+
214+ //Remove the last ' -> '
215+ return substr ($ category , 0 , -4 );
216+ }
217+
218+ private function parseNotes (Crawler $ dom ): string
219+ {
220+ //Concat product highlights and product description
221+ return $ dom ->filter ('div.product-detail-top-features ' )->html () . '<br><br> ' . $ dom ->filter ('div.product-detail-description-text ' )->html ();
222+ }
223+
224+ public function getCapabilities (): array
225+ {
226+ return [
227+ ProviderCapabilities::BASIC ,
228+ ProviderCapabilities::PICTURE ,
229+ ProviderCapabilities::PRICE ,
230+ ProviderCapabilities::DATASHEET
231+ ];
232+ }
233+ }
0 commit comments