Skip to content

Commit d4c5a29

Browse files
authored
Merge pull request #91 from govigilant/develop
2025.12.2
2 parents a5017db + 1a82b72 commit d4c5a29

File tree

14 files changed

+136
-51
lines changed

14 files changed

+136
-51
lines changed

packages/crawler/src/Actions/CrawlUrl.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44

55
use Illuminate\Http\Client\ConnectionException;
66
use Illuminate\Http\Client\Response;
7+
use Illuminate\Support\Arr;
78
use Illuminate\Support\Facades\Gate;
89
use Illuminate\Support\Facades\Http;
910
use Illuminate\Support\Str;
1011
use Vigilant\Core\Services\TeamService;
1112
use Vigilant\Crawler\Enums\State;
1213
use Vigilant\Crawler\Models\CrawledUrl;
14+
use Vigilant\Crawler\Models\IgnoredUrl;
1315
use Vigilant\Crawler\Notifications\RatelimitedNotification;
1416

1517
class CrawlUrl
@@ -105,9 +107,28 @@ public function crawl(CrawledUrl $url, int $try = 0): void
105107
];
106108
}
107109

110+
$existingLinks = CrawledUrl::query()
111+
->where('crawler_id', '=', $url->crawler_id)
112+
->whereIn('url_hash', Arr::pluck($queuedLinks, 'url_hash'))
113+
->pluck('url_hash')
114+
->all();
115+
116+
$queuedLinks = array_filter($queuedLinks, function (array $record) use ($existingLinks): bool {
117+
return ! in_array($record['url_hash'], $existingLinks, true);
118+
});
119+
108120
if ($queuedLinks !== []) {
109121
$timestamp = now();
110122
$records = [];
123+
$ignoredHashes = [];
124+
125+
if ($url->crawler_id !== null) {
126+
$ignoredHashes = IgnoredUrl::query()
127+
->where('crawler_id', '=', $url->crawler_id)
128+
->whereIn('url_hash', array_keys($queuedLinks))
129+
->pluck('url_hash')
130+
->all();
131+
}
111132

112133
foreach ($queuedLinks as $record) {
113134
$records[] = [
@@ -117,6 +138,7 @@ public function crawl(CrawledUrl $url, int $try = 0): void
117138
'url_hash' => $record['url_hash'],
118139
'url' => $record['url'],
119140
'found_on_id' => $record['found_on_id'],
141+
'ignored' => in_array($record['url_hash'], $ignoredHashes, true),
120142
'crawled' => false,
121143
'created_at' => $timestamp,
122144
'updated_at' => $timestamp,

packages/crawler/src/Actions/ImportSitemaps.php

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use Mtownsend\XmlToArray\XmlToArray;
88
use Vigilant\Crawler\Models\CrawledUrl;
99
use Vigilant\Crawler\Models\Crawler;
10+
use Vigilant\Crawler\Models\IgnoredUrl;
1011

1112
class ImportSitemaps
1213
{
@@ -73,16 +74,31 @@ protected function storeUrls(Crawler $crawler, Collection $urls): void
7374
continue;
7475
}
7576

77+
$timestamp = now();
78+
$urlHashes = $newUrls->map(fn ($url): string => md5($url));
79+
$ignoredHashes = $urlHashes->isEmpty()
80+
? []
81+
: IgnoredUrl::query()
82+
->where('crawler_id', '=', $crawler->id)
83+
->whereIn('url_hash', $urlHashes->all())
84+
->pluck('url_hash')
85+
->all();
86+
7687
$crawler->urls()->insert(
77-
$newUrls->map(fn ($url): array => [
78-
'uuid' => (new CrawledUrl)->newUniqueId(),
79-
'crawler_id' => $crawler->id,
80-
'team_id' => $crawler->team_id,
81-
'url' => $url,
82-
'url_hash' => md5($url),
83-
'created_at' => now(),
84-
'updated_at' => now(),
85-
])->toArray()
88+
$newUrls->map(function ($url) use ($crawler, $timestamp, $ignoredHashes): array {
89+
$hash = md5($url);
90+
91+
return [
92+
'uuid' => (new CrawledUrl)->newUniqueId(),
93+
'crawler_id' => $crawler->id,
94+
'team_id' => $crawler->team_id,
95+
'url' => $url,
96+
'url_hash' => $hash,
97+
'ignored' => in_array($hash, $ignoredHashes, true),
98+
'created_at' => $timestamp,
99+
'updated_at' => $timestamp,
100+
];
101+
})->toArray()
86102
);
87103
}
88104
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
namespace Vigilant\Crawler\Jobs;
4+
5+
use Illuminate\Bus\Queueable;
6+
use Illuminate\Contracts\Queue\ShouldBeUnique;
7+
use Illuminate\Contracts\Queue\ShouldQueue;
8+
use Illuminate\Foundation\Bus\Dispatchable;
9+
use Illuminate\Queue\InteractsWithQueue;
10+
use Illuminate\Queue\SerializesModels;
11+
use Vigilant\Core\Services\TeamService;
12+
use Vigilant\Crawler\Actions\StartCrawler;
13+
use Vigilant\Crawler\Models\Crawler;
14+
15+
class StartCrawlerJob implements ShouldBeUnique, ShouldQueue
16+
{
17+
use Dispatchable;
18+
use InteractsWithQueue;
19+
use Queueable;
20+
use SerializesModels;
21+
22+
public function __construct(public Crawler $crawler)
23+
{
24+
$this->onQueue(config('crawler.queue'));
25+
}
26+
27+
public function handle(StartCrawler $starter, TeamService $teamService): void
28+
{
29+
$teamService->setTeamById($this->crawler->team_id);
30+
$starter->start($this->crawler);
31+
}
32+
33+
public function uniqueId(): int
34+
{
35+
return $this->crawler->id;
36+
}
37+
}

packages/crawler/src/Livewire/Crawler/Dashboard.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public function render(): mixed
3030

3131
return view($view, [
3232
'total_url_count' => $crawler->totalUrlCount(),
33-
'issue_count' => $crawler->issueCount(),
33+
'issue_count' => $crawler->issueCount() ?? 0,
3434
'ignored_count' => $crawler->urls()->where('ignored', '=', true)->count(),
3535
'nextRun' => $crawler->enabled ? $nextRun->diffForHumans() : __('Crawler disabled'),
3636
]);

packages/crawler/src/Observers/CrawlerObserver.php

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,13 @@
22

33
namespace Vigilant\Crawler\Observers;
44

5-
use Vigilant\Crawler\Actions\StartCrawler;
5+
use Vigilant\Crawler\Jobs\StartCrawlerJob;
66
use Vigilant\Crawler\Models\Crawler;
77

88
class CrawlerObserver
99
{
10-
public function __construct(
11-
protected StartCrawler $startCrawler,
12-
) {
13-
}
14-
1510
public function created(Crawler $crawler): void
1611
{
17-
$this->startCrawler->start($crawler);
12+
StartCrawlerJob::dispatch($crawler);
1813
}
1914
}

packages/crawler/tests/Actions/ProcessCrawlerStateTest.php

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,15 @@
33
namespace Vigilant\Crawler\Tests\Actions;
44

55
use Illuminate\Support\Facades\Event;
6-
use Mockery\MockInterface;
76
use PHPUnit\Framework\Attributes\Test;
87
use Vigilant\Crawler\Actions\ProcessCrawlerState;
9-
use Vigilant\Crawler\Actions\StartCrawler;
108
use Vigilant\Crawler\Enums\State;
119
use Vigilant\Crawler\Events\CrawlerFinishedEvent;
1210
use Vigilant\Crawler\Models\Crawler;
1311
use Vigilant\Crawler\Tests\TestCase;
1412

1513
class ProcessCrawlerStateTest extends TestCase
1614
{
17-
#[Test]
18-
public function it_processes_pending_state(): void
19-
{
20-
/** @var Crawler $crawler */
21-
$crawler = Crawler::query()->create([
22-
'start_url' => 'vigilant',
23-
'state' => State::Pending,
24-
'schedule' => '0 0 * * *',
25-
]);
26-
27-
$this->mock(StartCrawler::class, function (MockInterface $mock): void {
28-
$mock->shouldReceive('start')->andReturn()->once();
29-
});
30-
31-
$crawler->forceFill(['state' => State::Pending])->save();
32-
$crawler->refresh();
33-
34-
/** @var ProcessCrawlerState $action */
35-
$action = app(ProcessCrawlerState::class);
36-
$action->process($crawler);
37-
}
38-
3915
#[Test]
4016
public function it_processes_crawling_finished_state(): void
4117
{

packages/healthchecks/src/Observers/HealthcheckObserver.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Vigilant\Healthchecks\Observers;
44

55
use Illuminate\Support\Str;
6+
use Vigilant\Healthchecks\Jobs\CheckHealthcheckJob;
67
use Vigilant\Healthchecks\Models\Healthcheck;
78

89
class HealthcheckObserver
@@ -13,4 +14,9 @@ public function creating(Healthcheck $healthcheck): void
1314
$healthcheck->token = Str::random(32);
1415
}
1516
}
17+
18+
public function created(Healthcheck $healthcheck): void
19+
{
20+
CheckHealthcheckJob::dispatch($healthcheck);
21+
}
1622
}

packages/healthchecks/tests/Actions/CheckMetricTest.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Vigilant\Healthchecks\Tests\Actions;
44

55
use Illuminate\Support\Carbon;
6+
use Illuminate\Support\Facades\Bus;
67
use PHPUnit\Framework\Attributes\Test;
78
use Vigilant\Healthchecks\Actions\CheckMetric;
89
use Vigilant\Healthchecks\Enums\Type;
@@ -22,6 +23,8 @@ public function it_does_nothing_when_no_metrics_exist(): void
2223
MetricIncreasingNotification::fake();
2324
MetricSpikeNotification::fake();
2425

26+
Bus::fake();
27+
2528
$healthcheck = Healthcheck::query()->create([
2629
'domain' => 'example.com',
2730
'type' => Type::Laravel,

packages/lighthouse/src/Models/LighthouseMonitor.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use Vigilant\Core\Scopes\TeamScope;
1313
use Vigilant\Sites\Models\Site;
1414
use Vigilant\Users\Observers\TeamObserver;
15+
use Vigilant\Lighthouse\Observers\LighthouseMonitorObserver;
1516

1617
/**
1718
* @property int $id
@@ -29,7 +30,7 @@
2930
* @property ?Site $site
3031
* @property Collection<int, LighthouseResult> $lighthouseResults
3132
*/
32-
#[ObservedBy([TeamObserver::class])]
33+
#[ObservedBy([TeamObserver::class, LighthouseMonitorObserver::class])]
3334
#[ScopedBy([TeamScope::class])]
3435
class LighthouseMonitor extends Model
3536
{
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
namespace Vigilant\Lighthouse\Observers;
4+
5+
use Vigilant\Lighthouse\Jobs\RunLighthouseJob;
6+
use Vigilant\Lighthouse\Models\LighthouseMonitor;
7+
8+
class LighthouseMonitorObserver
9+
{
10+
public function created(LighthouseMonitor $monitor): void
11+
{
12+
RunLighthouseJob::dispatch($monitor);
13+
}
14+
}

0 commit comments

Comments
 (0)