Skip to content

Commit 2fdd837

Browse files
committed
Started working on an aliexpress provider
1 parent e94d4a7 commit 2fdd837

File tree

1 file changed

+151
-0
lines changed

1 file changed

+151
-0
lines changed
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
<?php
2+
/*
3+
* This file is part of Part-DB (https://github.com/Part-DB/Part-DB-symfony).
4+
*
5+
* Copyright (C) 2019 - 2025 Jan Böhmer (https://github.com/jbtronics)
6+
*
7+
* This program is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU Affero General Public License as published
9+
* by the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU Affero General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU Affero General Public License
18+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
19+
*/
20+
21+
declare(strict_types=1);
22+
23+
24+
namespace App\Services\InfoProviderSystem\Providers;
25+
26+
use App\Services\InfoProviderSystem\DTOs\PartDetailDTO;
27+
use App\Services\InfoProviderSystem\DTOs\SearchResultDTO;
28+
use Symfony\Component\DomCrawler\Crawler;
29+
use Symfony\Contracts\HttpClient\HttpClientInterface;
30+
31+
class AliexpressProvider implements InfoProviderInterface
32+
{
33+
34+
public function __construct(private readonly HttpClientInterface $client)
35+
{
36+
37+
}
38+
39+
public function getProviderInfo(): array
40+
{
41+
return [
42+
'name' => 'Aliexpress',
43+
'description' => 'Webscrapping from reichelt.com to get part information',
44+
'url' => 'https://aliexpress.com/',
45+
'disabled_help' => 'Set PROVIDER_REICHELT_ENABLED env to 1'
46+
];
47+
}
48+
49+
public function getProviderKey(): string
50+
{
51+
return "aliexpress";
52+
}
53+
54+
public function isActive(): bool
55+
{
56+
return true;
57+
}
58+
59+
public function getBaseURL(): string
60+
{
61+
//Without the trailing slash
62+
return 'https://de.aliexpress.com';
63+
}
64+
65+
public function searchByKeyword(string $keyword): array
66+
{
67+
$response = $this->client->request('GET', $this->getBaseURL() . '/wholesale', [
68+
'query' => [
69+
'SearchText' => $keyword,
70+
'CatId' => 0,
71+
'd' => 'y',
72+
]
73+
]
74+
);
75+
76+
$content = $response->getContent();
77+
$dom = new Crawler($content);
78+
79+
$results = [];
80+
81+
//Iterate over each div.search-item-card-wrapper-gallery
82+
$dom->filter('div.search-item-card-wrapper-gallery')->each(function (Crawler $node) use (&$results) {
83+
84+
$productURL = $this->cleanProductURL($node->filter("a")->first()->attr('href'));
85+
$productID = $this->extractProductID($productURL);
86+
87+
//Skip results where we cannot extract a product ID
88+
if ($productID === null) {
89+
return;
90+
}
91+
92+
$results[] = new SearchResultDTO(
93+
provider_key: $this->getProviderKey(),
94+
provider_id: $productID,
95+
name: $node->filter("div[title]")->attr('title'),
96+
description: "",
97+
preview_image_url: $node->filter("img")->first()->attr('src'),
98+
provider_url: $productURL
99+
);
100+
});
101+
102+
return $results;
103+
}
104+
105+
private function cleanProductURL(string $url): string
106+
{
107+
//Strip the query string
108+
return explode('?', $url)[0];
109+
}
110+
111+
private function extractProductID(string $url): ?string
112+
{
113+
//We want the numeric id from the url before the .html
114+
$matches = [];
115+
preg_match('/\/(\d+)\.html/', $url, $matches);
116+
117+
return $matches[1] ?? null;
118+
}
119+
120+
public function getDetails(string $id): PartDetailDTO
121+
{
122+
//Ensure that $id is numeric
123+
if (!is_numeric($id)) {
124+
throw new \InvalidArgumentException("The id must be numeric");
125+
}
126+
127+
$product_page = $this->getBaseURL() . "/item/{$id}.html";
128+
$response = $this->client->request('GET', $product_page );
129+
130+
$content = $response->getContent();
131+
$dom = new Crawler($content);
132+
133+
return new PartDetailDTO(
134+
provider_key: $this->getProviderKey(),
135+
provider_id: $id,
136+
name: $dom->filter('h1[data-pl="product-title"]')->text(),
137+
description: "",
138+
provider_url: $product_page,
139+
notes: $dom->filter('div[data-pl="product-description"]')->html(),
140+
);
141+
}
142+
143+
public function getCapabilities(): array
144+
{
145+
return [
146+
ProviderCapabilities::BASIC,
147+
ProviderCapabilities::PICTURE,
148+
ProviderCapabilities::PRICE,
149+
];
150+
}
151+
}

0 commit comments

Comments
 (0)