Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/include/asset/ScrapedAssetCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
*/
class ScrapedAssetCollection extends ArrayObject
{

use TestForUrl;

/**
* @param array<int, ScrapedAsset> $assets
*/
Expand Down
17 changes: 1 addition & 16 deletions src/include/asset/StoredAssetCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
*/
class StoredAssetCollection extends ArrayObject
{
use TestForUrl;

/**
* @param array<mixed> $assets
Expand Down Expand Up @@ -46,20 +47,4 @@ public function append(mixed $value): void
}
parent::append($value);
}

/**
* Tests whether any of the {@link StoredAsset}s in this collection have the given URL.
* Ignoring capitalization.
* @param string $url
* @return bool
*/
public function containsUrl(string $url): bool
{
foreach ($this as $asset) {
if (strtolower($asset->url) == strtolower($url)) {
return true;
}
}
return false;
}
}
23 changes: 23 additions & 0 deletions src/include/asset/TestForUrl.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

namespace asset;

trait TestForUrl
{

/**
* Tests whether any of the {@link StoredAsset}s in this collection have the given URL.
* Ignoring capitalization.
* @param string $url
* @return bool
*/
public function containsUrl(string $url): bool
{
foreach ($this as $asset) {
if (strtolower($asset->url) == strtolower($url)) {
return true;
}
}
return false;
}
}
18 changes: 13 additions & 5 deletions src/include/creator/Creator.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
use creator\logic\CreatorLogicLocationTextures;
use creator\logic\CreatorLogicTwinbru;
use creator\logic\CreatorLogicLightbeans;
use creator\logic\CreatorLogicCgTrader;
use database\Database;
use DateTime;
use Exception;
Expand All @@ -48,6 +49,7 @@ enum Creator: int
case PBR_PX = 20;
case TWINBRU = 21;
case LIGHTBEANS = 22;
case CGTRADER = 23;

public static function fromAny(mixed $value): ?self
{
Expand Down Expand Up @@ -79,7 +81,8 @@ public function slug(): string
self::LOCATION_TEXTURES => 'location-textures',
self::PBR_PX => "pbr-px",
self::TWINBRU => "twinbru",
self::LIGHTBEANS => "lightbeans"
self::LIGHTBEANS => "lightbeans",
self::CGTRADER => "cgtrader"
};
}

Expand All @@ -102,7 +105,8 @@ public function title(): string
self::LOCATION_TEXTURES => 'Location Textures',
self::PBR_PX => 'PBRPX',
self::TWINBRU => 'Twinbru',
self::LIGHTBEANS => 'Lightbeans'
self::LIGHTBEANS => 'Lightbeans',
self::CGTRADER => 'CGTrader (Free Section)'
};
}

Expand All @@ -125,7 +129,8 @@ public function description(): string
self::LOCATION_TEXTURES => 'Locationtextures.com is an online platform providing high quality royalty-free photo reference packs for games and film industry. We offer free packs and every pack comes with free samples.',
self::PBR_PX => 'We are a small team from China, passionate about CG production. Through PBRPX, we provide artists with completely free, unrestricted digital assets, allowing them to unleash their creativity.',
self::TWINBRU => 'Browse our library of more than 13 000 digital fabric twins to download 3D fabric textures or order physical fabric samples.',
self::LIGHTBEANS => 'We Connect Manufacturers with Architects and Designers - Thousands of digitized products for your projects.'
self::LIGHTBEANS => 'We Connect Manufacturers with Architects and Designers - Thousands of digitized products for your projects.',
self::CGTRADER => 'CGTrader is a marketplace for 3D models, featuring professional and user-generated content for games, VR, AR, and visualization. Currently, only the "Free" section is indexed.'
};
}

Expand Down Expand Up @@ -172,7 +177,8 @@ public function baseUrl(): string
self::LOCATION_TEXTURES => 'https://locationtextures.com/panoramas/free-panoramas/',
self::PBR_PX => 'https://library.pbrpx.com/',
self::TWINBRU => 'https://textures.twinbru.com',
self::LIGHTBEANS => 'https://lightbeans.com'
self::LIGHTBEANS => 'https://lightbeans.com',
self::CGTRADER => 'https://www.cgtrader.com/'
};
}

Expand Down Expand Up @@ -200,7 +206,8 @@ public static function randomScrapingTarget(bool $considerAvailability): self
self::LOCATION_TEXTURES,
self::PBR_PX,
self::TWINBRU,
self::LIGHTBEANS
self::LIGHTBEANS,
self::CGTRADER
];

do {
Expand Down Expand Up @@ -323,6 +330,7 @@ public function getLogic(): CreatorLogic
self::PBR_PX => new CreatorLogicPbrPx(),
self::TWINBRU => new CreatorLogicTwinbru(),
self::LIGHTBEANS => new CreatorLogicLightbeans(),
self::CGTRADER => new CreatorLogicCgTrader(),
// This becomes relevant when new creators are added without logic
//default => throw new InvalidArgumentException("No logic defined for creator " . $this->title()),
};
Expand Down
133 changes: 133 additions & 0 deletions src/include/creator/logic/CreatorLogicCgTrader.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
<?php

namespace creator\logic;

use asset\AssetType;
use asset\ScrapedAsset;
use asset\ScrapedAssetCollection;
use asset\ScrapedAssetStatus;
use asset\StoredAssetCollection;
use creator\Creator;
use creator\CreatorLogic;
use DateTime;
use Exception;
use fetch\WebItemReference;
use log\Log;
use log\LogLevel;
use Rct567\DomQuery\DomQuery;

class CreatorLogicCgTrader extends CreatorLogic
{

protected Creator $creator = Creator::CGTRADER;

private string $indexingBaseUrl = "https://www.cgtrader.com/3d-models?free=1&licenses%5B%5D=cgt_standard_only&licenses%5B%5D=exclude_3d_print&licenses%5B%5D=exclude_editorial&licenses%5B%5D=exclude_adult_content&per_page=10&page=";
protected int $maxAssetsPerRun = 20;

public function scrapeAssets(StoredAssetCollection $existingAssets): ScrapedAssetCollection
{

$tmpCollection = new ScrapedAssetCollection();
$page = $this->getCreatorState("page") ?? 1;

do {
$response = (new WebItemReference($this->indexingBaseUrl . $page))->fetch();

if ($response->httpStatusCode != 200) {
// We have reached the end (or an error)
// Start from the beginning next time
$page = 1;
$response = (new WebItemReference($this->indexingBaseUrl . $page))->fetch();
}

$domQuery = $response->parseAsDomQuery();

if ($domQuery === null) {
throw new Exception("Error while trying to build DOM query.");
}

// Find all product cards on the page
$modelCards = $domQuery->find('.card-3d-model');
$cardsFoundThisIteration = sizeof($modelCards);

foreach ($modelCards as $modelCard) {

if (sizeof($tmpCollection) >= $this->maxAssetsPerRun) {
break 2; // Break out of both loops
}

// Find the URL of the asset
$assetLink = $modelCard->find('a.cgt-model-card__link');
$url = $assetLink->attr('href');

if (!$url) {
Log::write("Skipping model card with unresolvable URL", $this->indexingBaseUrl . $page, LogLevel::WARNING);
continue;
}

// Check if already exists
if (!$existingAssets->containsUrl($url) && !$tmpCollection->containsUrl($url)) {

// Get the full product page
$modelPage = new WebItemReference($url)->fetch()->parseAsDomQuery();

// Skip works which are marked as AI-generated.
// This won't be 100% effective because it relies on proper moderation from CGTrader but I didn't find a better method at this point.
$aiGeneratedTag = $modelPage->find('.pricing-area-wrapper *[data-for="tooltip-ai-generated"]');
if (sizeof($aiGeneratedTag) > 0) {
Log::write("Skipped AI generated model", $url, LogLevel::INFO);
continue;
}

// Get the name
$titleElement = $modelPage->find('.pricing-area__title');
$name = trim($titleElement->text());

if (empty($name)) {
Log::write("Skipping asset with empty name", $url, LogLevel::WARNING);
continue;
}

// Get thumbnail
$thumbnailImg = $modelPage->find('.gallery-area')->find('img');
$thumbnailUrl = $thumbnailImg->attr('src');

if (!$thumbnailUrl) {
Log::write("Skipping asset with unresolvable thumbnail", $url, LogLevel::WARNING);
continue;
}

// Extract tags from the name and the website
$tags = preg_split('/[\s\-_]+/', $name) ?: [];

$tagLinks = $modelPage->find('.description-area__related-tags');
foreach ($tagLinks as $tagLink) {
$tags[] = $tagLink->text();
}

$tags = array_filter($tags, fn($tag) => strlen($tag) > 1);
$tags = array_unique($tags);

$tmpCollection[] = new ScrapedAsset(
id: NULL,
creatorGivenId: null,
title: $name,
url: $url,
date: new DateTime(),
tags: array_values($tags),
type: AssetType::MODEL_3D,
creator: Creator::CGTRADER,
status: ScrapedAssetStatus::NEWLY_FOUND,
rawThumbnailData: (new WebItemReference(url: $thumbnailUrl))->fetch()->content
);
}
}

$page += 1;
} while ($cardsFoundThisIteration > 0 && $page < 1000 /* Failsafe */);

$this->setCreatorState("page", $page);

return $tmpCollection;
}
}
2 changes: 1 addition & 1 deletion src/include/creator/logic/CreatorLogicPoliigon.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public function scrapeAssets(StoredAssetCollection $existingAssets): ScrapedAsse
$url = $this->baseUrl . $urlPath;

// Check if already exists
if (!$this->isInExistingAssets($url, $existingAssets)) {
if (!$existingAssets->containsUrl($url)) {

// Get the name
$name = $assetBox->find('.asset-box__item-title-name')->text();
Expand Down
11 changes: 11 additions & 0 deletions src/include/fetch/FetchedWebItem.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use DOMDocument;
use DOMNodeList;
use DOMXPath;
use Rct567\DomQuery\DomQuery;
use SimpleXMLElement;

class FetchedWebItem
Expand Down Expand Up @@ -95,6 +96,16 @@ public function parseAsDomDocument(): ?DOMDocument
return $document;
}

public function parseAsDomQuery(): ?DomQuery
{
$dom = $this->parseAsDomDocument();
if ($dom === null) {
return null;
}
$query = new DomQuery($dom);
return $query;
}

/**
*
* @return null|array<string,string>
Expand Down
Binary file added src/public/static/creator/23.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading