-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.php
More file actions
executable file
·63 lines (56 loc) · 1.95 KB
/
test.php
File metadata and controls
executable file
·63 lines (56 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env -S php -dextension=modules/html_inspector.so
<?php
function extract_anchors(string $html_utf8, string $document_uri)
{
$doc = new HtmlInspector\HtmlDocument($html_utf8);
$base_node = $doc->select(0)->child()->name('html')->child()->name('head')->child()
->name('base')->iterate();
$base = HtmlInspector\resolve_iri($doc->get_attribute($base_node, 'href'), $document_uri);
$base ??= $document_uri;
$selector = $doc->select(0)->descendant()->name('a')->attribute_starts_with('href', '#')->not();
while (($node_a = $selector->iterate()) !== -1) {
$href = $doc->get_attribute($node_a, 'href');
$uri = HtmlInspector\resolve_iri($href, $base);
print("$uri\n");
}
}
function benchmark_HtmlInspector()
{
$html = file_get_contents('./test-html.html');
$time = microtime(true);
$num = 0;
for ($i = 0; $i < 200; ++$i) {
$doc = new HtmlInspector\HtmlDocument($html);
$selector = $doc->select(0)->descendant()->name('a');
while (($node = $selector->iterate()) != -1) {
//$doc->get_outer_html($node);
$href = $doc->get_attribute($node, 'href');
$num += strlen($href);
}
}
print($i / (microtime(true) - $time) . " / second");
print(" (" . $num / $i . ")\n");
}
function benchmark_libxml2()
{
$html = file_get_contents('./test-html.html');
$time = microtime(true);
$num = 0;
for ($i = 0; $i < 100; ++$i) {
$doc = new DOMDocument;
$doc->loadHTML($html, LIBXML_NOERROR | LIBXML_NOWARNING);
$xpath = new DOMXPath($doc);
foreach ($xpath->query('//a') as $a) {
$num += strlen($a->getAttribute('href'));
}
}
print($i / (microtime(true) - $time) . " / second");
print(" (" . $num / $i . ")\n");
}
function main()
{
extract_anchors(file_get_contents("test-html.html"), "https://example.org/");
benchmark_HtmlInspector();
benchmark_libxml2();
}
main();