1+ <?php
2+ /*
3+ * This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
4+ *
5+ * Copyright (C) 2019 - 2025 Jan Böhmer (https://github.com/jbtronics)
6+ *
7+ * This program is free software: you can redistribute it and/or modify
8+ * it under the terms of the GNU Affero General Public License as published
9+ * by the Free Software Foundation, either version 3 of the License, or
10+ * (at your option) any later version.
11+ *
12+ * This program is distributed in the hope that it will be useful,
13+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+ * GNU Affero General Public License for more details.
16+ *
17+ * You should have received a copy of the GNU Affero General Public License
18+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
19+ */
20+
21+ declare (strict_types=1 );
22+
23+
24+ namespace App \Services \InfoProviderSystem \Providers ;
25+
26+ use App \Entity \Parts \ManufacturingStatus ;
27+ use App \Entity \Parts \Part ;
28+ use App \Services \InfoProviderSystem \DTOs \FileDTO ;
29+ use App \Services \InfoProviderSystem \DTOs \ParameterDTO ;
30+ use App \Services \InfoProviderSystem \DTOs \PartDetailDTO ;
31+ use App \Services \InfoProviderSystem \DTOs \PriceDTO ;
32+ use App \Services \InfoProviderSystem \DTOs \PurchaseInfoDTO ;
33+ use App \Services \InfoProviderSystem \DTOs \SearchResultDTO ;
34+ use Symfony \Component \DependencyInjection \Attribute \Autowire ;
35+ use Symfony \Component \DomCrawler \Crawler ;
36+ use Symfony \Contracts \HttpClient \HttpClientInterface ;
37+
38+ class PollinProvider implements InfoProviderInterface
39+ {
40+
41+ public function __construct (private readonly HttpClientInterface $ client ,
42+ #[Autowire(env: 'bool:PROVIDER_POLLIN_ENABLED ' )]
43+ private readonly bool $ enabled = true ,
44+ )
45+ {
46+ }
47+
48+ public function getProviderInfo (): array
49+ {
50+ return [
51+ 'name ' => 'Pollin ' ,
52+ 'description ' => 'Webscrapping from pollin.de to get part information ' ,
53+ 'url ' => 'https://www.reichelt.de/ ' ,
54+ 'disabled_help ' => 'Set PROVIDER_POLLIN_ENABLED env to 1 '
55+ ];
56+ }
57+
58+ public function getProviderKey (): string
59+ {
60+ return 'pollin ' ;
61+ }
62+
63+ public function isActive (): bool
64+ {
65+ return $ this ->enabled ;
66+ }
67+
68+ public function searchByKeyword (string $ keyword ): array
69+ {
70+ $ response = $ this ->client ->request ('GET ' , 'https://www.pollin.de/search ' , [
71+ 'query ' => [
72+ 'search ' => $ keyword
73+ ]
74+ ]);
75+
76+ $ content = $ response ->getContent ();
77+ $ dom = new Crawler ($ content );
78+
79+ $ results = [];
80+
81+ //Iterate over each div.product-box
82+ $ dom ->filter ('div.product-box ' )->each (function (Crawler $ node ) use (&$ results ) {
83+ $ results [] = new SearchResultDTO (
84+ provider_key: $ this ->getProviderKey (),
85+ provider_id: $ node ->filter ('meta[itemprop="productID"] ' )->attr ('content ' ),
86+ name: $ node ->filter ('a.product-name ' )->text (),
87+ description: '' ,
88+ preview_image_url: $ node ->filter ('img.product-image ' )->attr ('src ' ),
89+ manufacturing_status: $ this ->mapAvailability ($ node ->filter ('link[itemprop="availability"] ' )->attr ('href ' )),
90+ provider_url: $ node ->filter ('a.product-name ' )->attr ('href ' )
91+ );
92+ });
93+
94+ return $ results ;
95+ }
96+
97+ private function mapAvailability (string $ availabilityURI ): ManufacturingStatus
98+ {
99+ return match ( $ availabilityURI ) {
100+ 'http://schema.org/InStock ' => ManufacturingStatus::ACTIVE ,
101+ 'http://schema.org/OutOfStock ' => ManufacturingStatus::DISCONTINUED ,
102+ default => ManufacturingStatus::NOT_SET
103+ };
104+ }
105+
106+ public function getDetails (string $ id ): PartDetailDTO
107+ {
108+ //Ensure that $id is numeric
109+ if (!is_numeric ($ id )) {
110+ throw new \InvalidArgumentException ("The id must be numeric! " );
111+ }
112+
113+ $ response = $ this ->client ->request ('GET ' , 'https://www.pollin.de/search ' , [
114+ 'query ' => [
115+ 'search ' => $ id
116+ ]
117+ ]);
118+
119+ //The response must have us redirected to the product page
120+ if ($ response ->getInfo ('redirect_count ' ) > 0 ) {
121+ throw new \RuntimeException ("Could not resolve the product page for the given id! " );
122+ }
123+
124+ $ content = $ response ->getContent ();
125+
126+ return $ this ->parseProductPage ($ content );
127+ }
128+
129+ private function parseProductPage (string $ content ): PartDetailDTO
130+ {
131+ $ dom = new Crawler ($ content );
132+
133+ $ productPageUrl = $ dom ->filter ('meta[property="product:product_link"] ' )->attr ('content ' );
134+ $ orderId = trim ($ dom ->filter ('span[itemprop="sku"] ' )->text ()); //Text is important here
135+
136+ //Calculate the mass
137+ $ massStr = $ dom ->filter ('meta[itemprop="weight"] ' )->attr ('content ' );
138+ //Remove the unit
139+ $ massStr = str_replace ('kg ' , '' , $ massStr );
140+ //Convert to float and convert to grams
141+ $ mass = (float ) $ massStr * 1000 ;
142+
143+ //Parse purchase info
144+ $ purchaseInfo = new PurchaseInfoDTO ('Pollin ' , $ orderId , $ this ->parsePrices ($ dom ), $ productPageUrl );
145+
146+ return new PartDetailDTO (
147+ provider_key: $ this ->getProviderKey (),
148+ provider_id: $ orderId ,
149+ name: trim ($ dom ->filter ('meta[property="og:title"] ' )->attr ('content ' )),
150+ description: $ dom ->filter ('meta[property="og:description"] ' )->attr ('content ' ),
151+ category: $ this ->parseCategory ($ dom ),
152+ manufacturer: $ dom ->filter ('meta[property="product:brand"] ' )->count () > 0 ? $ dom ->filter ('meta[property="product:brand"] ' )->attr ('content ' ) : null ,
153+ preview_image_url: $ dom ->filter ('meta[property="og:image"] ' )->attr ('content ' ),
154+ manufacturing_status: $ this ->mapAvailability ($ dom ->filter ('link[itemprop="availability"] ' )->attr ('href ' )),
155+ provider_url: $ productPageUrl ,
156+ notes: $ this ->parseNotes ($ dom ),
157+ datasheets: $ this ->parseDatasheets ($ dom ),
158+ parameters: $ this ->parseParameters ($ dom ),
159+ vendor_infos: [$ purchaseInfo ],
160+ mass: $ mass ,
161+ );
162+ }
163+
164+ private function parseDatasheets (Crawler $ dom ): array
165+ {
166+ //Iterate over each a element withing div.pol-product-detail-download-files
167+ $ datasheets = [];
168+ $ dom ->filter ('div.pol-product-detail-download-files a ' )->each (function (Crawler $ node ) use (&$ datasheets ) {
169+ $ datasheets [] = new FileDTO ($ node ->attr ('href ' ), $ node ->text ());
170+ });
171+
172+ return $ datasheets ;
173+ }
174+
175+ private function parseParameters (Crawler $ dom ): array
176+ {
177+ $ parameters = [];
178+
179+ //Iterate over each tr.properties-row inside table.product-detail-properties-table
180+ $ dom ->filter ('table.product-detail-properties-table tr.properties-row ' )->each (function (Crawler $ node ) use (&$ parameters ) {
181+ $ parameters [] = ParameterDTO::parseValueField (
182+ name: rtrim ($ node ->filter ('th.properties-label ' )->text (), ': ' ),
183+ value: $ node ->filter ('td.properties-value ' )->text ()
184+ );
185+ });
186+
187+ return $ parameters ;
188+ }
189+
190+ private function parseCategory (Crawler $ dom ): string
191+ {
192+ $ category = '' ;
193+
194+ //Iterate over each li.breadcrumb-item inside ol.breadcrumb
195+ $ dom ->filter ('ol.breadcrumb li.breadcrumb-item ' )->each (function (Crawler $ node ) use (&$ category ) {
196+ //Skip if it has breadcrumb-item-home class
197+ if (str_contains ($ node ->attr ('class ' ), 'breadcrumb-item-home ' )) {
198+ return ;
199+ }
200+
201+
202+ $ category .= $ node ->text () . ' -> ' ;
203+ });
204+
205+ //Remove the last ' -> '
206+ return substr ($ category , 0 , -4 );
207+ }
208+
209+ private function parseNotes (Crawler $ dom ): string
210+ {
211+ //Concat product highlights and product description
212+ return $ dom ->filter ('div.product-detail-top-features ' )->html () . '<br><br> ' . $ dom ->filter ('div.product-detail-description-text ' )->html ();
213+ }
214+
215+ private function parsePrices (Crawler $ dom ): array
216+ {
217+ //TODO: Properly handle multiple prices, for now we just look at the price for one piece
218+
219+ //We assume the currency is always the same
220+ $ currency = $ dom ->filter ('meta[property="product:price:currency"] ' )->attr ('content ' );
221+
222+ //If there is meta[property=highPrice] then use this as the price
223+ if ($ dom ->filter ('meta[itemprop="highPrice"] ' )->count () > 0 ) {
224+ $ price = $ dom ->filter ('meta[itemprop="highPrice"] ' )->attr ('content ' );
225+ } else {
226+ $ price = $ dom ->filter ('meta[property="product:price:amount"] ' )->attr ('content ' );
227+ }
228+
229+ return [
230+ new PriceDTO (1.0 , $ price , $ currency )
231+ ];
232+ }
233+
234+ public function getCapabilities (): array
235+ {
236+ return [
237+ ProviderCapabilities::BASIC ,
238+ ProviderCapabilities::PICTURE ,
239+ ProviderCapabilities::PRICE ,
240+ ProviderCapabilities::DATASHEET
241+ ];
242+ }
243+ }
0 commit comments