Skip to content

Commit 62ad49f

Browse files
Googlertwifkak
authored andcommitted
AMP Packager: Extract <link rel=preload as=image> into Link header
Also updates the `<link>` generation to use `imagesrcset` and `imagesizes`, which are both supported in Chrome 73+. We don't need to do anything fancy! Re: #461 PiperOrigin-RevId: 337974534
1 parent 6810ff6 commit 62ad49f

File tree

8 files changed

+669
-355
lines changed

8 files changed

+669
-355
lines changed

transformer/request/request.pb.go

Lines changed: 520 additions & 227 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

transformer/request/request.proto

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,18 @@ message Metadata {
119119
string as = 2;
120120

121121
// The media attribute for image preload link. This attribute is useful
122-
// only for image links.
123-
string media = 3;
122+
// only for image links. Deprecated, instead use the `attributes` field.
123+
string media = 3 [deprecated = true];
124+
125+
message Attribute {
126+
string key = 1;
127+
128+
string val = 2;
129+
}
130+
131+
// Optional remaining attributes. This does not include the `href` or `as`
132+
// attributes.
133+
repeated Attribute attributes = 4;
124134
}
125135
// Absolute URLs of resources that should be preloaded when the AMP is
126136
// prefetched. In a signed exchange (SXG) context, these would be included as

transformer/transformer.go

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ var ampAttrRE = func() *regexp.Regexp {
112112
return r
113113
}()
114114

115+
// firstSrcsetSourceRE captures the first source URL from a srcset.
116+
var firstSrcsetSourceRE = regexp.MustCompile(`[^\s,]+`)
117+
115118
// The allowed AMP formats, and their serialization as an html "amp4" attribute.
116119
var ampFormatSuffixes = map[rpb.Request_HtmlFormat]string{
117120
rpb.Request_AMP: "",
@@ -198,20 +201,54 @@ func extractPreloads(dom *amphtml.DOM) []*rpb.Metadata_Preload {
198201
// If you add additional preloads here, verify that they can not be
199202
// unintentionally author supplied.
200203
preloads := []*rpb.Metadata_Preload{}
201-
for child := dom.HeadNode.FirstChild; child != nil; child = child.NextSibling {
202-
switch child.DataAtom {
204+
next := dom.HeadNode.FirstChild
205+
for next != nil {
206+
// Set the next node to visit
207+
current := next
208+
next = current.NextSibling
209+
210+
switch current.DataAtom {
203211
case atom.Script:
204-
if src, ok := htmlnode.GetAttributeVal(child, "", "src"); ok {
212+
if src, ok := htmlnode.GetAttributeVal(current, "", "src"); ok {
205213
preloads = append(preloads, &rpb.Metadata_Preload{Url: src, As: "script"})
206214
}
207215
case atom.Link:
208-
if rel, ok := htmlnode.GetAttributeVal(child, "", "rel"); ok {
216+
if rel, ok := htmlnode.GetAttributeVal(current, "", "rel"); ok {
209217
if strings.EqualFold(rel, "stylesheet") {
210-
if href, ok := htmlnode.GetAttributeVal(child, "", "href"); ok {
218+
if href, ok := htmlnode.GetAttributeVal(current, "", "href"); ok {
211219
preloads = append(preloads, &rpb.Metadata_Preload{Url: href, As: "style"})
212220
}
221+
} else if strings.EqualFold(rel, "preload") {
222+
if as, ok := htmlnode.GetAttributeVal(current, "", "as"); ok && strings.EqualFold(as, "image") {
223+
href, _ := htmlnode.GetAttributeVal(current, "", "href")
224+
// It's valid for a <link> to not have a href, but when we generate a Link header it must have one.
225+
if href == "" {
226+
imagesrcset, ok := htmlnode.GetAttributeVal(current, "", "imagesrcset")
227+
if !ok {
228+
continue
229+
}
230+
// The href doesn't really matter here. Browsers will ignore it and instead prioritize whichever source is selected from imagesrcset. However, the Link header *must* have it.
231+
// Stub this by just finding the first source URL possible.
232+
firstSource := firstSrcsetSourceRE.FindString(imagesrcset)
233+
if firstSource == "" {
234+
continue
235+
}
236+
href = firstSource
237+
}
238+
preload := &rpb.Metadata_Preload{Url: href, As: "image"}
239+
for _, attr := range current.Attr {
240+
if strings.EqualFold(attr.Key, "rel") || strings.EqualFold(attr.Key, "as") || strings.EqualFold(attr.Key, "href") {
241+
continue
242+
}
243+
preload.Attributes = append(preload.Attributes, &rpb.Metadata_Preload_Attribute{
244+
Key: attr.Key,
245+
Val: attr.Val,
246+
})
247+
}
248+
preloads = append(preloads, preload)
249+
htmlnode.RemoveNode(&current)
250+
}
213251
}
214-
// TODO: This should extract <link rel=preload>s with arbitrary attributes, if the href is is on-cache.
215252
}
216253
}
217254
if len(preloads) == maxPreloads {
@@ -313,12 +350,14 @@ func Process(r *rpb.Request) (string, *rpb.Metadata, error) {
313350
if err := runTransformers(context, fns); err != nil {
314351
return "", nil, err
315352
}
353+
// extractPreloads is an implicit transformer, and must run before printer.
354+
preloads := extractPreloads(context.DOM)
316355
var o strings.Builder
317356
if err := printer.Print(&o, context.DOM.RootNode); err != nil {
318357
return "", nil, err
319358
}
320359
metadata := rpb.Metadata{
321-
Preloads: extractPreloads(context.DOM),
360+
Preloads: preloads,
322361
MaxAgeSecs: computeMaxAgeSeconds(context.DOM),
323362
}
324363
return o.String(), &metadata, nil

transformer/transformer_test.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
rpb "github.com/ampproject/amppackager/transformer/request"
1010
"github.com/ampproject/amppackager/transformer/transformers"
1111
"github.com/google/go-cmp/cmp"
12+
"github.com/kylelemons/godebug/diff"
1213
"github.com/golang/protobuf/proto"
1314
)
1415

@@ -61,55 +62,91 @@ func TestPreloads(t *testing.T) {
6162
// Programmatically prepare the `> maxPreloads` test case.
6263
var manyScriptsHTML strings.Builder
6364
manyScriptsPreloads := []*rpb.Metadata_Preload{}
64-
manyScriptsHTML.WriteString("<html ⚡>")
65+
manyScriptsHTML.WriteString("<html ⚡><head>")
6566
for i := 0; i <= maxPreloads; i++ {
66-
fmt.Fprintf(&manyScriptsHTML, `<script src="foo%d"></script>`, i)
67+
fmt.Fprintf(&manyScriptsHTML, `<script src=foo%d></script>`, i)
6768
if i < maxPreloads {
6869
manyScriptsPreloads = append(manyScriptsPreloads, &rpb.Metadata_Preload{Url: fmt.Sprintf("foo%d", i), As: "script"})
6970
}
7071
}
7172

7273
tcs := []struct {
7374
html string
75+
expectedHTML string
7476
expectedPreloads []*rpb.Metadata_Preload
7577
}{
7678
{
7779
"<html ⚡><script>",
80+
"<html ⚡><head><script></script></head><body></body></html>",
7881
[]*rpb.Metadata_Preload{},
7982
},
8083
{
8184
"<html ⚡><script src=foo>",
85+
"<html ⚡><head><script src=foo></script></head><body></body></html>",
8286
[]*rpb.Metadata_Preload{{Url: "foo", As: "script"}},
8387
},
8488
{
8589
"<html ⚡><link rel=foaf href=foo>",
90+
"<html ⚡><head><link href=foo rel=foaf></head><body></body></html>",
8691
[]*rpb.Metadata_Preload{},
8792
},
8893
{
8994
"<html ⚡><link rel=stylesheet href=foo>",
95+
"<html ⚡><head><link href=foo rel=stylesheet></head><body></body></html>",
9096
[]*rpb.Metadata_Preload{{Url: "foo", As: "style"}},
9197
},
9298
{ // case-insensitive
9399
"<html ⚡><link rel=STYLEsheet href=foo>",
100+
"<html ⚡><head><link href=foo rel=STYLEsheet></head><body></body></html>",
94101
[]*rpb.Metadata_Preload{{Url: "foo", As: "style"}},
95102
},
96103
{
97104
"<html ⚡><link rel=stylesheet href=foo><script src=bar>",
105+
"<html ⚡><head><link href=foo rel=stylesheet><script src=bar></script></head><body></body></html>",
98106
[]*rpb.Metadata_Preload{{Url: "foo", As: "style"}, {Url: "bar", As: "script"}},
99107
},
100108
{
101109
manyScriptsHTML.String(),
110+
manyScriptsHTML.String() + "</head><body></body></html>",
102111
manyScriptsPreloads,
103112
},
113+
{
114+
"<html ⚡><link rel=preload href=foo>",
115+
"<html ⚡><head><link href=foo rel=preload></head><body></body></html>",
116+
[]*rpb.Metadata_Preload{},
117+
},
118+
{
119+
"<html ⚡><link rel=preload as=image href=foo>",
120+
"<html ⚡><head></head><body></body></html>",
121+
[]*rpb.Metadata_Preload{{Url: "foo", As: "image"}},
122+
},
123+
{
124+
"<html ⚡><link rel=preload as=image href=foo imagesrcset=bar>",
125+
"<html ⚡><head></head><body></body></html>",
126+
[]*rpb.Metadata_Preload{{Url: "foo", As: "image", Attributes: []*rpb.Metadata_Preload_Attribute{{Key: "imagesrcset", Val: "bar"}}}},
127+
},
128+
{
129+
"<html ⚡><link rel=preload as=image href=foo imagesrcset=bar imagesizes=100vw>",
130+
"<html ⚡><head></head><body></body></html>",
131+
[]*rpb.Metadata_Preload{{Url: "foo", As: "image", Attributes: []*rpb.Metadata_Preload_Attribute{{Key: "imagesrcset", Val: "bar"}, {Key: "imagesizes", Val: "100vw"}}}},
132+
},
133+
{
134+
"<html ⚡><link rel=preload as=image href=foo media=print>",
135+
"<html ⚡><head></head><body></body></html>",
136+
[]*rpb.Metadata_Preload{{Url: "foo", As: "image", Attributes: []*rpb.Metadata_Preload_Attribute{{Key: "media", Val: "print"}}}},
137+
},
104138
}
105139

106140
for _, tc := range tcs {
107141
t.Run(tc.html, func(t *testing.T) {
108-
_, metadata, err := Process(&rpb.Request{Html: tc.html, Config: rpb.Request_NONE})
142+
output, metadata, err := Process(&rpb.Request{Html: tc.html, Config: rpb.Request_NONE})
109143
if err != nil {
110144
t.Fatalf("unexpected failure: %v", err)
111145
}
112146

147+
if diff := diff.Diff(output, tc.expectedHTML); diff != "" {
148+
t.Errorf("html output differs (-want +got):\n%s", diff)
149+
}
113150
if diff := cmp.Diff(tc.expectedPreloads, metadata.Preloads, cmp.Comparer(proto.Equal)); diff != "" {
114151
t.Errorf("preloads differ (-want +got):\n%s", diff)
115152
}

transformer/transformers/preloadimage.go

Lines changed: 15 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
package transformers
1616

1717
import (
18-
"fmt"
19-
"sort"
20-
"strconv"
2118
"strings"
2219

2320
"github.com/ampproject/amppackager/transformer/internal/amphtml"
@@ -31,15 +28,10 @@ const maxHeroImages int = 2
3128
type HeroImage struct {
3229
src string
3330
srcset string
31+
sizes string
3432
ampImg *html.Node
3533
}
3634

37-
// mediaQuerySource represents an individual source from a srcset with a unique media query that will only load the href when appropriate.
38-
type mediaQuerySource struct {
39-
href string
40-
media string
41-
}
42-
4335
// PreloadImage adds link rel="preload" to head element to preload the most revalent image in the AMP document,
4436
// and inserts an img tag if the image is an amp-img.
4537
func PreloadImage(e *Context) error {
@@ -73,7 +65,20 @@ func PreloadImage(e *Context) error {
7365
}
7466

7567
func prioritizeHeroImage(e *Context, heroImage HeroImage) {
76-
for _, link := range buildLinkPreloads(heroImage) {
68+
if heroImage.src != "" || heroImage.srcset != "" {
69+
link := htmlnode.Element("link",
70+
html.Attribute{Key: "rel", Val: "preload"},
71+
html.Attribute{Key: "as", Val: "image"},
72+
)
73+
if heroImage.src != "" {
74+
htmlnode.SetAttribute(link, "", "href", heroImage.src)
75+
}
76+
if heroImage.srcset != "" {
77+
htmlnode.SetAttribute(link, "", "imagesrcset", heroImage.srcset)
78+
}
79+
if heroImage.sizes != "" {
80+
htmlnode.SetAttribute(link, "", "imagesizes", heroImage.sizes)
81+
}
7782
e.DOM.HeadNode.AppendChild(link)
7883
}
7984

@@ -83,37 +88,6 @@ func prioritizeHeroImage(e *Context, heroImage HeroImage) {
8388
}
8489
}
8590

86-
func buildLinkPreloads(heroImage HeroImage) []*html.Node {
87-
// One of these is guaranteed.
88-
if heroImage.srcset != "" {
89-
if medias, ok := srcsetToMediaQueries(heroImage.srcset); ok {
90-
links := make([]*html.Node, len(medias))
91-
for i, mediaSrc := range medias {
92-
links[i] = htmlnode.Element("link",
93-
html.Attribute{Key: "rel", Val: "preload"},
94-
html.Attribute{Key: "as", Val: "image"},
95-
html.Attribute{Key: "href", Val: mediaSrc.href},
96-
html.Attribute{Key: "media", Val: mediaSrc.media},
97-
)
98-
}
99-
100-
return links
101-
}
102-
}
103-
104-
if heroImage.src == "" {
105-
return []*html.Node{}
106-
}
107-
108-
return []*html.Node{
109-
htmlnode.Element("link",
110-
html.Attribute{Key: "rel", Val: "preload"},
111-
html.Attribute{Key: "as", Val: "image"},
112-
html.Attribute{Key: "href", Val: heroImage.src},
113-
),
114-
}
115-
}
116-
11791
func buildImg(ampImg *html.Node) *html.Node {
11892
img := htmlnode.Element("img",
11993
html.Attribute{Key: "class", Val: "i-amphtml-fill-content i-amphtml-replaced-content"},
@@ -172,65 +146,6 @@ func ValidateSrc(in string, has bool) (string, bool) {
172146
return in, true
173147
}
174148

175-
// Converts the raw srcset value into multiple sources with an appropriate media query to select that source.
176-
// TODO(jridgewell, amaltas): Support pixel density with -webkit-max-device-pixel-ratio and -webkit-min-device-pixel-ratio
177-
func srcsetToMediaQueries(srcset string) ([]mediaQuerySource, bool) {
178-
type Source struct {
179-
href string
180-
size int
181-
}
182-
183-
srcSets := strings.Split(strings.TrimSpace(srcset), ",")
184-
length := len(srcSets)
185-
sources := make([]Source, length)
186-
medias := make([]mediaQuerySource, length)
187-
188-
if length == 0 {
189-
return medias, false
190-
}
191-
192-
for i, src := range srcSets {
193-
imgComponents := strings.Fields(src)
194-
if len(imgComponents) != 2 {
195-
return medias, false
196-
}
197-
198-
source := Source{imgComponents[0], 0}
199-
200-
if strings.HasSuffix(imgComponents[1], "w") {
201-
size, err := strconv.Atoi(strings.TrimSuffix(imgComponents[1], "w"))
202-
if err != nil {
203-
return medias, false
204-
}
205-
206-
source.size = size
207-
} else {
208-
return medias, false
209-
}
210-
211-
sources[i] = source
212-
}
213-
214-
// Sort the images based on their target sizes in asc order.
215-
sort.Slice(sources, func(i, j int) bool {
216-
return sources[i].size < sources[j].size
217-
})
218-
219-
for i, ci := range sources {
220-
var mediaQuery string
221-
if i == 0 {
222-
mediaQuery = fmt.Sprintf("(max-width: %dpx)", ci.size)
223-
} else if i == length-1 {
224-
mediaQuery = fmt.Sprintf("(min-width: %dpx)", sources[i-1].size+1)
225-
} else {
226-
mediaQuery = fmt.Sprintf("(min-width: %dpx) and (max-width: %dpx)", sources[i-1].size+1, ci.size)
227-
}
228-
229-
medias[i] = mediaQuerySource{ci.href, "screen and " + mediaQuery}
230-
}
231-
return medias, true
232-
}
233-
234149
func lazyLoadRemainingAmpImgs(n *html.Node) {
235150
for n != nil {
236151
if n.Data == "amp-img" {

0 commit comments

Comments
 (0)