Skip to content

Commit 94843c0

Browse files
committed
Introduce a transitional XPath format so existing URL Metrics will not be invalidated
1 parent c47a613 commit 94843c0

File tree

8 files changed

+205
-19
lines changed

8 files changed

+205
-19
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<html lang="en">
2+
<head>
3+
<meta charset="utf-8">
4+
<title>...</title>
5+
</head>
6+
<body>
7+
<div id="page">
8+
<img src="https://example.com/foo.jpg" alt="Foo" width="1200" height="800">
9+
</div>
10+
</body>
11+
</html>

plugins/image-prioritizer/tests/test-cases/common-lcp-image-with-old-xpath-format/expected.html

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
return static function ( Test_Image_Prioritizer_Helper $test_case ): void {
3+
$test_case->populate_url_metrics(
4+
array(
5+
array(
6+
// Note: This is intentionally using old XPath scheme. This is to make sure that the old format still results in the expected optimization during a transitional period.
7+
'xpath' => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::DIV]/*[1][self::IMG]',
8+
'isLCP' => true,
9+
),
10+
),
11+
false
12+
);
13+
};

plugins/optimization-detective/class-od-element.php

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ class OD_Element implements ArrayAccess, JsonSerializable {
3333
*/
3434
protected $data;
3535

36+
/**
37+
* Transitional XPath.
38+
*
39+
* @since n.e.x.t
40+
* @var non-empty-string|null
41+
*/
42+
protected $transitional_xpath = null;
43+
3644
/**
3745
* URL Metric that this element belongs to.
3846
*
@@ -54,16 +62,6 @@ class OD_Element implements ArrayAccess, JsonSerializable {
5462
public function __construct( array $data, OD_URL_Metric $url_metric ) {
5563
$this->data = $data;
5664

57-
// Convert old XPath scheme.
58-
$xpath = preg_replace(
59-
'#^/\*\[1\]\[self::HTML\]/\*\[2\]\[self::BODY\]/\*\[\d+\]\[self::([a-zA-Z0-9:_-]+)\]#',
60-
'/HTML/BODY/$1',
61-
$this->data['xpath']
62-
);
63-
if ( is_string( $xpath ) && '' !== $xpath ) {
64-
$this->data['xpath'] = $xpath;
65-
}
66-
6765
$this->url_metric = $url_metric;
6866
}
6967

@@ -100,6 +98,9 @@ public function get_url_metric_group(): ?OD_URL_Metric_Group {
10098
* @return mixed|null The property value, or null if not set.
10199
*/
102100
public function get( string $key ) {
101+
if ( 'xpath' === $key ) {
102+
return $this->get_xpath();
103+
}
103104
return $this->data[ $key ] ?? null;
104105
}
105106

@@ -129,11 +130,58 @@ public function is_lcp_candidate(): bool {
129130
* Gets XPath for element.
130131
*
131132
* @since 0.7.0
133+
* @since n.e.x.t Returns the transitional XPath format. To access the underlying raw XPath, access the 'xpath' key of the jsonSerialize response.
132134
*
133135
* @return non-empty-string XPath.
134136
*/
135137
public function get_xpath(): string {
136-
return $this->data['xpath'];
138+
139+
if ( ! isset( $this->transitional_xpath ) ) {
140+
$replacements = array(
141+
142+
/*
143+
* Convert the original XPath format for elements in the BODY.
144+
*
145+
* Example:
146+
* /*[1][self::HTML]/*[2][self::BODY]/*[1][self::DIV]/*[1][self::IMG]
147+
* =>
148+
* /HTML/BODY/DIV/*[1][self::IMG]
149+
*/
150+
'#^/\*\[1]\[self::HTML]/\*\[2]\[self::BODY]/\*\[\d+]\[self::([a-zA-Z0-9:_-]+)]#' => '/HTML/BODY/$1',
151+
152+
/*
153+
* Convert the original XPath format for elements in the HEAD.
154+
*
155+
* Example:
156+
* /*[1][self::HTML]/*[1][self::HEAD]/*[1][self::META]
157+
* =>
158+
* /HTML/HEAD/*[1][self::META]
159+
*/
160+
'#^/\*\[1\]\[self::HTML\]/\*\[1\]\[self::HEAD]#' => '/HTML/HEAD',
161+
162+
/*
163+
* Convert the new XPath format for elements in the BODY.
164+
*
165+
* Note that the new XPath format for elements in the HEAD does not need to be converted to the
166+
* transitional format since disambiguating attributes are not used in the HEAD.
167+
*
168+
* Example:
169+
* /HTML/BODY/DIV[@id='page']/*[1][self::IMG]
170+
* =>
171+
* /HTML/BODY/DIV/*[1][self::IMG]
172+
*/
173+
'#^(/HTML/BODY/\w+)\[@[^\]]+?]#' => '$1',
174+
);
175+
foreach ( $replacements as $search => $replace ) {
176+
$xpath = preg_replace( $search, $replace, $this->data['xpath'], -1, $count );
177+
if ( $count > 0 ) {
178+
$this->transitional_xpath = $xpath;
179+
break;
180+
}
181+
}
182+
}
183+
184+
return $this->transitional_xpath ?? $this->data['xpath'];
137185
}
138186

139187
/**
@@ -200,6 +248,9 @@ public function offsetExists( $offset ): bool {
200248
*/
201249
#[ReturnTypeWillChange]
202250
public function offsetGet( $offset ) {
251+
if ( 'xpath' === $offset ) {
252+
return $this->get_xpath();
253+
}
203254
return $this->data[ $offset ] ?? null;
204255
}
205256

plugins/optimization-detective/class-od-html-tag-processor.php

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,17 @@ final class OD_HTML_Tag_Processor extends WP_HTML_Tag_Processor {
195195
*/
196196
private $current_xpath = null;
197197

198+
/**
199+
* Transitional XPath for the current tag.
200+
*
201+
* This is used to store the old XPath format in a transitional period until which new URL Metrics are expected to
202+
* have been collected to purge out references to the old format.
203+
*
204+
* @since n.e.x.t
205+
* @var string|null
206+
*/
207+
private $transitional_current_xpath = null;
208+
198209
/**
199210
* Whether the previous tag does not expect a closer.
200211
*
@@ -300,7 +311,8 @@ public function expects_closer( ?string $tag_name = null ): bool {
300311
* @return bool Whether a token was parsed.
301312
*/
302313
public function next_token(): bool {
303-
$this->current_xpath = null; // Clear cache.
314+
$this->current_xpath = null; // Clear cache.
315+
$this->transitional_current_xpath = null; // Clear cache.
304316
++$this->cursor_move_count;
305317
if ( ! parent::next_token() ) {
306318
$this->open_stack_tags = array();
@@ -629,9 +641,31 @@ private function is_foreign_element(): bool {
629641
*
630642
* @since 0.4.0
631643
*
644+
* @param bool $transitional_format Whether to use the transitional XPath format. Default true.
632645
* @return string XPath.
633646
*/
634-
public function get_xpath(): string {
647+
public function get_xpath( bool $transitional_format = true ): string {
648+
/*
649+
* This transitional format is used by default for all extensions. The non-transitional format is used only in
650+
* od_optimize_template_output_buffer() when setting the data-od-xpath attribute. This is so that the new format
651+
* will replace the old format as new URL Metrics are collected. After a month of the new format being live, the
652+
* transitional format can be eliminated. See the corresponding logic in OD_Element for normalizing both the
653+
* old and new XPath formats to use the transitional format.
654+
*/
655+
if ( $transitional_format ) {
656+
if ( null === $this->transitional_current_xpath ) {
657+
$this->transitional_current_xpath = '';
658+
foreach ( $this->get_indexed_breadcrumbs() as $i => list( $tag_name, $index, $attributes ) ) {
659+
if ( $i < 2 || ( 2 === $i && '/HTML/BODY' === $this->transitional_current_xpath ) ) {
660+
$this->transitional_current_xpath .= "/$tag_name";
661+
} else {
662+
$this->transitional_current_xpath .= sprintf( '/*[%d][self::%s]', $index + 1, $tag_name );
663+
}
664+
}
665+
}
666+
return $this->transitional_current_xpath;
667+
}
668+
635669
if ( null === $this->current_xpath ) {
636670
$this->current_xpath = '';
637671
foreach ( $this->get_indexed_breadcrumbs() as $i => list( $tag_name, $index, $attributes ) ) {

plugins/optimization-detective/optimization.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,9 @@ function od_optimize_template_output_buffer( string $buffer ): string {
249249
$processor->release_bookmark( $current_tag_bookmark );
250250

251251
if ( $tracked_in_url_metrics && $needs_detection ) {
252-
$processor->set_meta_attribute( 'xpath', $processor->get_xpath() );
252+
// Note: This is explicitly using the non-transitional XPath format since this is what will get saved in newly-submitted URL Metrics.
253+
$xpath = $processor->get_xpath( false );
254+
$processor->set_meta_attribute( 'xpath', $xpath );
253255
}
254256
} while ( $processor->next_open_tag() );
255257

plugins/optimization-detective/tests/test-class-od-element.php

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ static function ( array $schema ): array {
3939
);
4040

4141
$element_data = array(
42-
'xpath' => '/HTML/BODY/DIV/*[1][self::IMG]',
42+
'xpath' => '/HTML/BODY/HEADER/*[1][self::IMG]',
4343
'isLCP' => false,
4444
'isLCPCandidate' => true,
4545
'intersectionRatio' => 0.123,
@@ -148,4 +148,56 @@ static function ( array $schema ): array {
148148
}
149149
$this->assertInstanceOf( Exception::class, $exception ); // @phpstan-ignore method.impossibleType (It is thrown by offsetUnset actually.)
150150
}
151+
152+
/**
153+
* Data provider.
154+
*
155+
* @return array<string, mixed>
156+
*/
157+
public function data_provider_test_transitional_get_xpath(): array {
158+
return array(
159+
'current-without-disambiguating-attr-on-body-child' => array(
160+
'xpath' => '/HTML/BODY/HEADER/*[1][self::IMG]',
161+
'expected' => '/HTML/BODY/HEADER/*[1][self::IMG]',
162+
),
163+
'current-with-disambiguating-attr-on-body-child' => array(
164+
'xpath' => '/HTML/BODY/DIV[@id=\'page\']/*[1][self::IMG]',
165+
'expected' => '/HTML/BODY/DIV/*[1][self::IMG]',
166+
),
167+
'old-format-body' => array(
168+
'xpath' => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::DIV]/*[1][self::IMG]',
169+
'expected' => '/HTML/BODY/DIV/*[1][self::IMG]',
170+
),
171+
'old-format-head' => array(
172+
'xpath' => '/*[1][self::HTML]/*[1][self::HEAD]/*[1][self::META]',
173+
'expected' => '/HTML/HEAD/*[1][self::META]',
174+
),
175+
);
176+
}
177+
178+
/**
179+
* Test that get_xpath() converts to the transitional format.
180+
*
181+
* @dataProvider data_provider_test_transitional_get_xpath
182+
* @covers ::get_xpath
183+
*/
184+
public function test_transitional_get_xpath( string $xpath, string $expected ): void {
185+
186+
$element_data = array(
187+
'xpath' => $xpath,
188+
'isLCP' => false,
189+
'isLCPCandidate' => true,
190+
'intersectionRatio' => 0.123,
191+
'intersectionRect' => $this->get_sample_dom_rect(),
192+
'boundingClientRect' => $this->get_sample_dom_rect(),
193+
);
194+
195+
$url_metric = $this->get_sample_url_metric( array( 'element' => $element_data ) );
196+
$element = $url_metric->get_elements()[0];
197+
198+
$this->assertSame( $expected, $element->get_xpath() );
199+
$this->assertSame( $expected, $element->get( 'xpath' ) );
200+
$this->assertSame( $expected, $element['xpath'] );
201+
$this->assertSame( $xpath, $url_metric->jsonSerialize()['elements'][0]['xpath'] );
202+
}
151203
}

plugins/optimization-detective/tests/test-class-od-html-tag-processor.php

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -433,16 +433,26 @@ public function data_provider_sample_documents(): array {
433433
*/
434434
public function test_next_tag_and_get_xpath( string $document, array $open_tags, array $xpath_breadcrumbs ): void {
435435
$p = new OD_HTML_Tag_Processor( $document );
436-
$this->assertSame( '', $p->get_xpath(), 'Expected empty XPath since iteration has not started.' );
436+
$this->assertSame( '', $p->get_xpath( false ), 'Expected empty XPath since iteration has not started.' );
437437
$actual_open_tags = array();
438438
$actual_xpath_breadcrumbs_mapping = array();
439439
while ( $p->next_open_tag() ) {
440440
$actual_open_tags[] = $p->get_tag();
441441

442-
$xpath = $p->get_xpath();
442+
$xpath = $p->get_xpath( false );
443443
$this->assertArrayNotHasKey( $xpath, $actual_xpath_breadcrumbs_mapping, 'Each tag must have a unique XPath.' );
444444

445445
$actual_xpath_breadcrumbs_mapping[ $xpath ] = $p->get_breadcrumbs();
446+
447+
$transitional_xpath = $p->get_xpath( true );
448+
$this->assertRegExp(
449+
'#^/HTML(
450+
/HEAD(/\*\[\d+]\[self::\w+])?
451+
|
452+
/BODY(/DIV(/\*\[\d+]\[self::\w+])*)?
453+
)?$#x',
454+
$transitional_xpath
455+
);
446456
}
447457

448458
$this->assertSame( $open_tags, $actual_open_tags, "Expected list of open tags to match.\nSnapshot: " . $this->export_array_snapshot( $actual_open_tags, true ) );
@@ -615,7 +625,7 @@ public function test_bookmarking_and_seeking(): void {
615625
$bookmarks[] = $bookmark;
616626
$actual_figure_contents[] = array(
617627
'tag' => $processor->get_tag(),
618-
'xpath' => $processor->get_xpath(),
628+
'xpath' => $processor->get_xpath( false ),
619629
'depth' => $processor->get_current_depth(),
620630
);
621631
}
@@ -656,7 +666,7 @@ public function test_bookmarking_and_seeking(): void {
656666
$processor->seek( $bookmark );
657667
$sought_actual_contents[] = array(
658668
'tag' => $processor->get_tag(),
659-
'xpath' => $processor->get_xpath(),
669+
'xpath' => $processor->get_xpath( false ),
660670
'depth' => $processor->get_current_depth(),
661671
);
662672
}

0 commit comments

Comments
 (0)